{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999679969277051, "eval_steps": 500, "global_step": 15623, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 31.41022733943927, "learning_rate": 2.1321961620469085e-08, "loss": 1.823, "step": 1 }, { "epoch": 0.0, "grad_norm": 27.925236905433984, "learning_rate": 4.264392324093817e-08, "loss": 1.8285, "step": 2 }, { "epoch": 0.0, "grad_norm": 31.34252860649552, "learning_rate": 6.396588486140725e-08, "loss": 1.6956, "step": 3 }, { "epoch": 0.0, "grad_norm": 42.678161888192804, "learning_rate": 8.528784648187634e-08, "loss": 1.6743, "step": 4 }, { "epoch": 0.0, "grad_norm": 37.424956436172494, "learning_rate": 1.0660980810234542e-07, "loss": 1.9077, "step": 5 }, { "epoch": 0.0, "grad_norm": 39.677112848407006, "learning_rate": 1.279317697228145e-07, "loss": 1.821, "step": 6 }, { "epoch": 0.0, "grad_norm": 82.22009714472784, "learning_rate": 1.4925373134328358e-07, "loss": 1.7193, "step": 7 }, { "epoch": 0.0, "grad_norm": 25.12998112464896, "learning_rate": 1.7057569296375268e-07, "loss": 1.6946, "step": 8 }, { "epoch": 0.0, "grad_norm": 125.19650502426776, "learning_rate": 1.918976545842218e-07, "loss": 1.913, "step": 9 }, { "epoch": 0.0, "grad_norm": 24.530448045368182, "learning_rate": 2.1321961620469084e-07, "loss": 1.7912, "step": 10 }, { "epoch": 0.0, "grad_norm": 32.42446618154666, "learning_rate": 2.3454157782515995e-07, "loss": 1.753, "step": 11 }, { "epoch": 0.0, "grad_norm": 25.32672276931106, "learning_rate": 2.55863539445629e-07, "loss": 1.7533, "step": 12 }, { "epoch": 0.0, "grad_norm": 65.25572139053148, "learning_rate": 2.771855010660981e-07, "loss": 1.7959, "step": 13 }, { "epoch": 0.0, "grad_norm": 41.842276838645915, "learning_rate": 2.9850746268656716e-07, "loss": 1.7559, "step": 14 }, { "epoch": 0.0, "grad_norm": 21.21712825758727, "learning_rate": 3.1982942430703626e-07, "loss": 1.6745, "step": 15 }, { "epoch": 0.0, "grad_norm": 67.18827228290024, "learning_rate": 3.4115138592750537e-07, "loss": 1.7763, "step": 16 }, { "epoch": 0.0, "grad_norm": 25.35354936697425, "learning_rate": 3.624733475479744e-07, "loss": 1.9156, "step": 17 }, { "epoch": 0.0, "grad_norm": 50.41118419933061, "learning_rate": 3.837953091684436e-07, "loss": 1.7181, "step": 18 }, { "epoch": 0.0, "grad_norm": 657.3593501288865, "learning_rate": 4.0511727078891263e-07, "loss": 1.6839, "step": 19 }, { "epoch": 0.0, "grad_norm": 46.693055142499084, "learning_rate": 4.264392324093817e-07, "loss": 1.7178, "step": 20 }, { "epoch": 0.0, "grad_norm": 21.364785622613102, "learning_rate": 4.4776119402985074e-07, "loss": 1.7108, "step": 21 }, { "epoch": 0.0, "grad_norm": 120.47845784969898, "learning_rate": 4.690831556503199e-07, "loss": 1.6712, "step": 22 }, { "epoch": 0.0, "grad_norm": 35.38027323636389, "learning_rate": 4.904051172707889e-07, "loss": 1.7657, "step": 23 }, { "epoch": 0.0, "grad_norm": 32.93934446544091, "learning_rate": 5.11727078891258e-07, "loss": 1.7056, "step": 24 }, { "epoch": 0.0, "grad_norm": 176.59293559338678, "learning_rate": 5.33049040511727e-07, "loss": 1.6422, "step": 25 }, { "epoch": 0.0, "grad_norm": 37.27120124018048, "learning_rate": 5.543710021321962e-07, "loss": 1.7546, "step": 26 }, { "epoch": 0.0, "grad_norm": 26.673149402372538, "learning_rate": 5.756929637526653e-07, "loss": 1.6864, "step": 27 }, { "epoch": 0.0, "grad_norm": 21.304305694967955, "learning_rate": 5.970149253731343e-07, "loss": 1.6843, "step": 28 }, { "epoch": 0.0, "grad_norm": 34.34113075202021, "learning_rate": 6.183368869936035e-07, "loss": 1.7185, "step": 29 }, { "epoch": 0.0, "grad_norm": 42.87510101138674, "learning_rate": 6.396588486140725e-07, "loss": 1.5912, "step": 30 }, { "epoch": 0.0, "grad_norm": 9.28592995857262, "learning_rate": 6.609808102345417e-07, "loss": 0.8613, "step": 31 }, { "epoch": 0.0, "grad_norm": 30.525569264982295, "learning_rate": 6.823027718550107e-07, "loss": 1.6817, "step": 32 }, { "epoch": 0.0, "grad_norm": 7.614880251844668, "learning_rate": 7.036247334754798e-07, "loss": 0.8444, "step": 33 }, { "epoch": 0.0, "grad_norm": 20.361990630916985, "learning_rate": 7.249466950959488e-07, "loss": 1.5247, "step": 34 }, { "epoch": 0.0, "grad_norm": 39.08935104805929, "learning_rate": 7.462686567164179e-07, "loss": 1.6458, "step": 35 }, { "epoch": 0.0, "grad_norm": 7.462235887029643, "learning_rate": 7.675906183368872e-07, "loss": 0.784, "step": 36 }, { "epoch": 0.0, "grad_norm": 85.71955881997027, "learning_rate": 7.889125799573562e-07, "loss": 1.6072, "step": 37 }, { "epoch": 0.0, "grad_norm": 8.752352094444852, "learning_rate": 8.102345415778253e-07, "loss": 0.9488, "step": 38 }, { "epoch": 0.0, "grad_norm": 39.309129377433834, "learning_rate": 8.315565031982943e-07, "loss": 1.4629, "step": 39 }, { "epoch": 0.0, "grad_norm": 25.302918281703377, "learning_rate": 8.528784648187634e-07, "loss": 1.3964, "step": 40 }, { "epoch": 0.0, "grad_norm": 35.074163820266584, "learning_rate": 8.742004264392324e-07, "loss": 1.4671, "step": 41 }, { "epoch": 0.0, "grad_norm": 37.652652325096504, "learning_rate": 8.955223880597015e-07, "loss": 1.4633, "step": 42 }, { "epoch": 0.0, "grad_norm": 44.87933549027052, "learning_rate": 9.168443496801707e-07, "loss": 1.4239, "step": 43 }, { "epoch": 0.0, "grad_norm": 16.682937297063752, "learning_rate": 9.381663113006398e-07, "loss": 1.2409, "step": 44 }, { "epoch": 0.0, "grad_norm": 23.215874987444487, "learning_rate": 9.594882729211088e-07, "loss": 1.368, "step": 45 }, { "epoch": 0.0, "grad_norm": 10.386016003451305, "learning_rate": 9.808102345415779e-07, "loss": 1.2163, "step": 46 }, { "epoch": 0.0, "grad_norm": 9.253242264780551, "learning_rate": 1.002132196162047e-06, "loss": 1.3103, "step": 47 }, { "epoch": 0.0, "grad_norm": 12.743653936054306, "learning_rate": 1.023454157782516e-06, "loss": 1.2674, "step": 48 }, { "epoch": 0.0, "grad_norm": 3.1425604575326656, "learning_rate": 1.044776119402985e-06, "loss": 0.7901, "step": 49 }, { "epoch": 0.0, "grad_norm": 77.43293796841492, "learning_rate": 1.066098081023454e-06, "loss": 1.2249, "step": 50 }, { "epoch": 0.0, "grad_norm": 6.556683225801615, "learning_rate": 1.0874200426439234e-06, "loss": 1.2956, "step": 51 }, { "epoch": 0.0, "grad_norm": 24.195872846460514, "learning_rate": 1.1087420042643924e-06, "loss": 1.148, "step": 52 }, { "epoch": 0.0, "grad_norm": 6.4180029423931035, "learning_rate": 1.1300639658848615e-06, "loss": 1.0952, "step": 53 }, { "epoch": 0.0, "grad_norm": 29.289196796412394, "learning_rate": 1.1513859275053305e-06, "loss": 1.2816, "step": 54 }, { "epoch": 0.0, "grad_norm": 26.533185450658408, "learning_rate": 1.1727078891257996e-06, "loss": 1.1627, "step": 55 }, { "epoch": 0.0, "grad_norm": 44.48286240161451, "learning_rate": 1.1940298507462686e-06, "loss": 1.1846, "step": 56 }, { "epoch": 0.0, "grad_norm": 34.784502912609305, "learning_rate": 1.2153518123667379e-06, "loss": 1.2311, "step": 57 }, { "epoch": 0.0, "grad_norm": 35.360654058705435, "learning_rate": 1.236673773987207e-06, "loss": 1.0898, "step": 58 }, { "epoch": 0.0, "grad_norm": 33.831288932833246, "learning_rate": 1.257995735607676e-06, "loss": 1.1662, "step": 59 }, { "epoch": 0.0, "grad_norm": 12.135129941258896, "learning_rate": 1.279317697228145e-06, "loss": 1.1669, "step": 60 }, { "epoch": 0.0, "grad_norm": 5.279296705895599, "learning_rate": 1.300639658848614e-06, "loss": 1.1678, "step": 61 }, { "epoch": 0.0, "grad_norm": 25.912309885936263, "learning_rate": 1.3219616204690834e-06, "loss": 1.1715, "step": 62 }, { "epoch": 0.0, "grad_norm": 13.622110748894741, "learning_rate": 1.3432835820895524e-06, "loss": 1.1885, "step": 63 }, { "epoch": 0.0, "grad_norm": 49.500678035001215, "learning_rate": 1.3646055437100215e-06, "loss": 1.0938, "step": 64 }, { "epoch": 0.0, "grad_norm": 10.926656652212772, "learning_rate": 1.3859275053304905e-06, "loss": 1.1464, "step": 65 }, { "epoch": 0.0, "grad_norm": 2.8649768415527013, "learning_rate": 1.4072494669509596e-06, "loss": 0.6268, "step": 66 }, { "epoch": 0.0, "grad_norm": 7.19490544742673, "learning_rate": 1.4285714285714286e-06, "loss": 1.0591, "step": 67 }, { "epoch": 0.0, "grad_norm": 6.435210500160962, "learning_rate": 1.4498933901918977e-06, "loss": 1.146, "step": 68 }, { "epoch": 0.0, "grad_norm": 5.4240890772487935, "learning_rate": 1.4712153518123667e-06, "loss": 1.0325, "step": 69 }, { "epoch": 0.0, "grad_norm": 12.162755828484105, "learning_rate": 1.4925373134328358e-06, "loss": 1.0471, "step": 70 }, { "epoch": 0.0, "grad_norm": 1.6204511467135163, "learning_rate": 1.5138592750533053e-06, "loss": 0.7443, "step": 71 }, { "epoch": 0.0, "grad_norm": 7.075049942876415, "learning_rate": 1.5351812366737743e-06, "loss": 1.0145, "step": 72 }, { "epoch": 0.0, "grad_norm": 1.8072849553162433, "learning_rate": 1.5565031982942434e-06, "loss": 0.6973, "step": 73 }, { "epoch": 0.0, "grad_norm": 22.1810836670785, "learning_rate": 1.5778251599147124e-06, "loss": 1.1723, "step": 74 }, { "epoch": 0.0, "grad_norm": 53.16500914334074, "learning_rate": 1.5991471215351815e-06, "loss": 1.0783, "step": 75 }, { "epoch": 0.0, "grad_norm": 32.926288573044886, "learning_rate": 1.6204690831556505e-06, "loss": 1.1048, "step": 76 }, { "epoch": 0.0, "grad_norm": 18.33504774804005, "learning_rate": 1.6417910447761196e-06, "loss": 1.0619, "step": 77 }, { "epoch": 0.0, "grad_norm": 2.1649941015910668, "learning_rate": 1.6631130063965886e-06, "loss": 0.6948, "step": 78 }, { "epoch": 0.01, "grad_norm": 4.058474485656322, "learning_rate": 1.6844349680170577e-06, "loss": 0.9967, "step": 79 }, { "epoch": 0.01, "grad_norm": 15.959200986763562, "learning_rate": 1.7057569296375267e-06, "loss": 0.998, "step": 80 }, { "epoch": 0.01, "grad_norm": 2.2351518294336206, "learning_rate": 1.7270788912579958e-06, "loss": 0.8142, "step": 81 }, { "epoch": 0.01, "grad_norm": 5.580861783455842, "learning_rate": 1.7484008528784648e-06, "loss": 1.0184, "step": 82 }, { "epoch": 0.01, "grad_norm": 3.858891900087226, "learning_rate": 1.7697228144989339e-06, "loss": 1.0999, "step": 83 }, { "epoch": 0.01, "grad_norm": 7.915060802393804, "learning_rate": 1.791044776119403e-06, "loss": 1.0843, "step": 84 }, { "epoch": 0.01, "grad_norm": 9.980398233822148, "learning_rate": 1.812366737739872e-06, "loss": 1.0089, "step": 85 }, { "epoch": 0.01, "grad_norm": 9.233636097653667, "learning_rate": 1.8336886993603415e-06, "loss": 1.0904, "step": 86 }, { "epoch": 0.01, "grad_norm": 43.22581632739323, "learning_rate": 1.8550106609808105e-06, "loss": 1.1406, "step": 87 }, { "epoch": 0.01, "grad_norm": 1.9949724571337697, "learning_rate": 1.8763326226012796e-06, "loss": 0.7556, "step": 88 }, { "epoch": 0.01, "grad_norm": 3.8965848625606614, "learning_rate": 1.8976545842217486e-06, "loss": 0.9571, "step": 89 }, { "epoch": 0.01, "grad_norm": 2.866264834781842, "learning_rate": 1.9189765458422177e-06, "loss": 1.0393, "step": 90 }, { "epoch": 0.01, "grad_norm": 6.309517228869182, "learning_rate": 1.9402985074626867e-06, "loss": 1.061, "step": 91 }, { "epoch": 0.01, "grad_norm": 1.6642207490227008, "learning_rate": 1.9616204690831558e-06, "loss": 0.6389, "step": 92 }, { "epoch": 0.01, "grad_norm": 3.170587024413656, "learning_rate": 1.982942430703625e-06, "loss": 0.9664, "step": 93 }, { "epoch": 0.01, "grad_norm": 33.62163633200956, "learning_rate": 2.004264392324094e-06, "loss": 1.0536, "step": 94 }, { "epoch": 0.01, "grad_norm": 17.72193685431109, "learning_rate": 2.025586353944563e-06, "loss": 0.9983, "step": 95 }, { "epoch": 0.01, "grad_norm": 3.621753063769003, "learning_rate": 2.046908315565032e-06, "loss": 1.0225, "step": 96 }, { "epoch": 0.01, "grad_norm": 7.229557943534115, "learning_rate": 2.068230277185501e-06, "loss": 1.0008, "step": 97 }, { "epoch": 0.01, "grad_norm": 1.811459167084552, "learning_rate": 2.08955223880597e-06, "loss": 0.6689, "step": 98 }, { "epoch": 0.01, "grad_norm": 10.91091809682377, "learning_rate": 2.110874200426439e-06, "loss": 1.0633, "step": 99 }, { "epoch": 0.01, "grad_norm": 11.279654904838074, "learning_rate": 2.132196162046908e-06, "loss": 1.0068, "step": 100 }, { "epoch": 0.01, "grad_norm": 5.349882587622066, "learning_rate": 2.1535181236673773e-06, "loss": 1.0129, "step": 101 }, { "epoch": 0.01, "grad_norm": 9.465862104867504, "learning_rate": 2.1748400852878467e-06, "loss": 1.0255, "step": 102 }, { "epoch": 0.01, "grad_norm": 14.1771428092429, "learning_rate": 2.1961620469083158e-06, "loss": 1.0192, "step": 103 }, { "epoch": 0.01, "grad_norm": 5.173592011367369, "learning_rate": 2.217484008528785e-06, "loss": 0.9409, "step": 104 }, { "epoch": 0.01, "grad_norm": 3.195435956532825, "learning_rate": 2.238805970149254e-06, "loss": 1.0181, "step": 105 }, { "epoch": 0.01, "grad_norm": 10.566586415111502, "learning_rate": 2.260127931769723e-06, "loss": 1.1205, "step": 106 }, { "epoch": 0.01, "grad_norm": 6.561892968497346, "learning_rate": 2.281449893390192e-06, "loss": 1.0343, "step": 107 }, { "epoch": 0.01, "grad_norm": 5.557762838583296, "learning_rate": 2.302771855010661e-06, "loss": 1.019, "step": 108 }, { "epoch": 0.01, "grad_norm": 9.205265432249757, "learning_rate": 2.32409381663113e-06, "loss": 1.0563, "step": 109 }, { "epoch": 0.01, "grad_norm": 5.851143459511912, "learning_rate": 2.345415778251599e-06, "loss": 1.0055, "step": 110 }, { "epoch": 0.01, "grad_norm": 6.93330653121349, "learning_rate": 2.366737739872068e-06, "loss": 1.0101, "step": 111 }, { "epoch": 0.01, "grad_norm": 8.854123780409887, "learning_rate": 2.3880597014925373e-06, "loss": 1.0497, "step": 112 }, { "epoch": 0.01, "grad_norm": 2.5393340201820855, "learning_rate": 2.4093816631130067e-06, "loss": 1.0215, "step": 113 }, { "epoch": 0.01, "grad_norm": 5.02261362528784, "learning_rate": 2.4307036247334758e-06, "loss": 0.9085, "step": 114 }, { "epoch": 0.01, "grad_norm": 7.4123314657794115, "learning_rate": 2.452025586353945e-06, "loss": 0.9933, "step": 115 }, { "epoch": 0.01, "grad_norm": 4.595898374167055, "learning_rate": 2.473347547974414e-06, "loss": 0.9811, "step": 116 }, { "epoch": 0.01, "grad_norm": 5.605220479853014, "learning_rate": 2.494669509594883e-06, "loss": 1.0537, "step": 117 }, { "epoch": 0.01, "grad_norm": 7.78394543732505, "learning_rate": 2.515991471215352e-06, "loss": 1.0065, "step": 118 }, { "epoch": 0.01, "grad_norm": 5.396979069561824, "learning_rate": 2.537313432835821e-06, "loss": 1.0448, "step": 119 }, { "epoch": 0.01, "grad_norm": 1.3546480836436119, "learning_rate": 2.55863539445629e-06, "loss": 0.6352, "step": 120 }, { "epoch": 0.01, "grad_norm": 4.141321683141333, "learning_rate": 2.579957356076759e-06, "loss": 0.9532, "step": 121 }, { "epoch": 0.01, "grad_norm": 2.942087529097167, "learning_rate": 2.601279317697228e-06, "loss": 0.9704, "step": 122 }, { "epoch": 0.01, "grad_norm": 3.9379572113825803, "learning_rate": 2.6226012793176977e-06, "loss": 1.0845, "step": 123 }, { "epoch": 0.01, "grad_norm": 1.4834857460518542, "learning_rate": 2.6439232409381667e-06, "loss": 0.7469, "step": 124 }, { "epoch": 0.01, "grad_norm": 12.495417706599296, "learning_rate": 2.6652452025586358e-06, "loss": 0.9762, "step": 125 }, { "epoch": 0.01, "grad_norm": 1.782569817565175, "learning_rate": 2.686567164179105e-06, "loss": 0.7201, "step": 126 }, { "epoch": 0.01, "grad_norm": 12.608060537892987, "learning_rate": 2.707889125799574e-06, "loss": 0.9993, "step": 127 }, { "epoch": 0.01, "grad_norm": 7.22261956507977, "learning_rate": 2.729211087420043e-06, "loss": 0.9944, "step": 128 }, { "epoch": 0.01, "grad_norm": 2.306910122817937, "learning_rate": 2.750533049040512e-06, "loss": 0.8656, "step": 129 }, { "epoch": 0.01, "grad_norm": 87.33663020932856, "learning_rate": 2.771855010660981e-06, "loss": 1.008, "step": 130 }, { "epoch": 0.01, "grad_norm": 1.6647508150941457, "learning_rate": 2.79317697228145e-06, "loss": 0.6404, "step": 131 }, { "epoch": 0.01, "grad_norm": 5.253735146969837, "learning_rate": 2.814498933901919e-06, "loss": 0.9608, "step": 132 }, { "epoch": 0.01, "grad_norm": 2.746184555344314, "learning_rate": 2.835820895522388e-06, "loss": 1.0626, "step": 133 }, { "epoch": 0.01, "grad_norm": 7.116154851329462, "learning_rate": 2.8571428571428573e-06, "loss": 1.1073, "step": 134 }, { "epoch": 0.01, "grad_norm": 9.232450606558086, "learning_rate": 2.8784648187633263e-06, "loss": 1.0142, "step": 135 }, { "epoch": 0.01, "grad_norm": 14.957908236398374, "learning_rate": 2.8997867803837954e-06, "loss": 1.1418, "step": 136 }, { "epoch": 0.01, "grad_norm": 1.5327165140314858, "learning_rate": 2.9211087420042644e-06, "loss": 0.6501, "step": 137 }, { "epoch": 0.01, "grad_norm": 7.138536579042225, "learning_rate": 2.9424307036247335e-06, "loss": 1.0016, "step": 138 }, { "epoch": 0.01, "grad_norm": 2.482383549721096, "learning_rate": 2.9637526652452025e-06, "loss": 1.0162, "step": 139 }, { "epoch": 0.01, "grad_norm": 14.719818834880009, "learning_rate": 2.9850746268656716e-06, "loss": 1.2091, "step": 140 }, { "epoch": 0.01, "grad_norm": 4.190017018817901, "learning_rate": 3.006396588486141e-06, "loss": 0.9629, "step": 141 }, { "epoch": 0.01, "grad_norm": 17.828077059069233, "learning_rate": 3.0277185501066105e-06, "loss": 0.9709, "step": 142 }, { "epoch": 0.01, "grad_norm": 4.487833125950818, "learning_rate": 3.0490405117270796e-06, "loss": 0.9749, "step": 143 }, { "epoch": 0.01, "grad_norm": 15.328508988112867, "learning_rate": 3.0703624733475486e-06, "loss": 0.9346, "step": 144 }, { "epoch": 0.01, "grad_norm": 13.346811588449016, "learning_rate": 3.0916844349680177e-06, "loss": 0.9141, "step": 145 }, { "epoch": 0.01, "grad_norm": 7.134426923151666, "learning_rate": 3.1130063965884867e-06, "loss": 0.9702, "step": 146 }, { "epoch": 0.01, "grad_norm": 8.846143363165087, "learning_rate": 3.1343283582089558e-06, "loss": 1.0572, "step": 147 }, { "epoch": 0.01, "grad_norm": 11.879484556768835, "learning_rate": 3.155650319829425e-06, "loss": 0.9215, "step": 148 }, { "epoch": 0.01, "grad_norm": 4.278747791234565, "learning_rate": 3.176972281449894e-06, "loss": 0.9328, "step": 149 }, { "epoch": 0.01, "grad_norm": 2.2620565432073043, "learning_rate": 3.198294243070363e-06, "loss": 1.0677, "step": 150 }, { "epoch": 0.01, "grad_norm": 1.3083416585222918, "learning_rate": 3.219616204690832e-06, "loss": 0.6074, "step": 151 }, { "epoch": 0.01, "grad_norm": 5.041558619346638, "learning_rate": 3.240938166311301e-06, "loss": 0.9517, "step": 152 }, { "epoch": 0.01, "grad_norm": 7.140714045469368, "learning_rate": 3.26226012793177e-06, "loss": 1.0139, "step": 153 }, { "epoch": 0.01, "grad_norm": 11.028352230118362, "learning_rate": 3.283582089552239e-06, "loss": 0.9418, "step": 154 }, { "epoch": 0.01, "grad_norm": 5.976307096528054, "learning_rate": 3.304904051172708e-06, "loss": 0.9579, "step": 155 }, { "epoch": 0.01, "grad_norm": 6.877768561182632, "learning_rate": 3.3262260127931773e-06, "loss": 0.9728, "step": 156 }, { "epoch": 0.01, "grad_norm": 3.031439016149461, "learning_rate": 3.3475479744136463e-06, "loss": 0.8744, "step": 157 }, { "epoch": 0.01, "grad_norm": 2.1891958527239788, "learning_rate": 3.3688699360341154e-06, "loss": 0.94, "step": 158 }, { "epoch": 0.01, "grad_norm": 2.302694547149424, "learning_rate": 3.3901918976545844e-06, "loss": 0.8558, "step": 159 }, { "epoch": 0.01, "grad_norm": 6.90397503586972, "learning_rate": 3.4115138592750535e-06, "loss": 1.0303, "step": 160 }, { "epoch": 0.01, "grad_norm": 2.0787009827436584, "learning_rate": 3.4328358208955225e-06, "loss": 0.9023, "step": 161 }, { "epoch": 0.01, "grad_norm": 8.339023799458541, "learning_rate": 3.4541577825159916e-06, "loss": 0.9327, "step": 162 }, { "epoch": 0.01, "grad_norm": 12.166027663938856, "learning_rate": 3.4754797441364606e-06, "loss": 0.9239, "step": 163 }, { "epoch": 0.01, "grad_norm": 2.257170640828908, "learning_rate": 3.4968017057569297e-06, "loss": 0.9919, "step": 164 }, { "epoch": 0.01, "grad_norm": 1.4657070106580508, "learning_rate": 3.5181236673773987e-06, "loss": 0.7076, "step": 165 }, { "epoch": 0.01, "grad_norm": 1.92713074202233, "learning_rate": 3.5394456289978678e-06, "loss": 0.6344, "step": 166 }, { "epoch": 0.01, "grad_norm": 6.541465520630148, "learning_rate": 3.560767590618337e-06, "loss": 0.9272, "step": 167 }, { "epoch": 0.01, "grad_norm": 4.007312616201161, "learning_rate": 3.582089552238806e-06, "loss": 0.9567, "step": 168 }, { "epoch": 0.01, "grad_norm": 8.170957010966816, "learning_rate": 3.603411513859275e-06, "loss": 0.9102, "step": 169 }, { "epoch": 0.01, "grad_norm": 6.2730640574382575, "learning_rate": 3.624733475479744e-06, "loss": 0.9781, "step": 170 }, { "epoch": 0.01, "grad_norm": 5.466204425757793, "learning_rate": 3.6460554371002135e-06, "loss": 0.9287, "step": 171 }, { "epoch": 0.01, "grad_norm": 1.1666271915553152, "learning_rate": 3.667377398720683e-06, "loss": 0.6304, "step": 172 }, { "epoch": 0.01, "grad_norm": 9.671808198946136, "learning_rate": 3.688699360341152e-06, "loss": 1.042, "step": 173 }, { "epoch": 0.01, "grad_norm": 3.023012648226756, "learning_rate": 3.710021321961621e-06, "loss": 0.9452, "step": 174 }, { "epoch": 0.01, "grad_norm": 4.668060589932627, "learning_rate": 3.73134328358209e-06, "loss": 0.933, "step": 175 }, { "epoch": 0.01, "grad_norm": 6.292747945409089, "learning_rate": 3.752665245202559e-06, "loss": 0.8538, "step": 176 }, { "epoch": 0.01, "grad_norm": 4.560755098057, "learning_rate": 3.773987206823028e-06, "loss": 0.9509, "step": 177 }, { "epoch": 0.01, "grad_norm": 1.4499243097034462, "learning_rate": 3.7953091684434973e-06, "loss": 0.6698, "step": 178 }, { "epoch": 0.01, "grad_norm": 2.7968960857066394, "learning_rate": 3.816631130063966e-06, "loss": 0.8723, "step": 179 }, { "epoch": 0.01, "grad_norm": 15.099235269353493, "learning_rate": 3.837953091684435e-06, "loss": 0.965, "step": 180 }, { "epoch": 0.01, "grad_norm": 4.343335510795924, "learning_rate": 3.859275053304904e-06, "loss": 0.8759, "step": 181 }, { "epoch": 0.01, "grad_norm": 3.1969481750378264, "learning_rate": 3.8805970149253735e-06, "loss": 0.919, "step": 182 }, { "epoch": 0.01, "grad_norm": 3.6124855269361813, "learning_rate": 3.9019189765458425e-06, "loss": 0.9083, "step": 183 }, { "epoch": 0.01, "grad_norm": 13.882857295364463, "learning_rate": 3.9232409381663116e-06, "loss": 0.9972, "step": 184 }, { "epoch": 0.01, "grad_norm": 2.955090336839954, "learning_rate": 3.944562899786781e-06, "loss": 0.9468, "step": 185 }, { "epoch": 0.01, "grad_norm": 3.003254722846294, "learning_rate": 3.96588486140725e-06, "loss": 0.8822, "step": 186 }, { "epoch": 0.01, "grad_norm": 1.4192052860756614, "learning_rate": 3.987206823027719e-06, "loss": 0.7968, "step": 187 }, { "epoch": 0.01, "grad_norm": 6.225249207590155, "learning_rate": 4.008528784648188e-06, "loss": 0.9961, "step": 188 }, { "epoch": 0.01, "grad_norm": 4.156293014339058, "learning_rate": 4.029850746268657e-06, "loss": 0.8355, "step": 189 }, { "epoch": 0.01, "grad_norm": 6.983661725659429, "learning_rate": 4.051172707889126e-06, "loss": 1.0318, "step": 190 }, { "epoch": 0.01, "grad_norm": 13.125133393111614, "learning_rate": 4.072494669509595e-06, "loss": 1.1135, "step": 191 }, { "epoch": 0.01, "grad_norm": 7.729088806650626, "learning_rate": 4.093816631130064e-06, "loss": 0.9531, "step": 192 }, { "epoch": 0.01, "grad_norm": 3.908046025443563, "learning_rate": 4.115138592750533e-06, "loss": 1.0225, "step": 193 }, { "epoch": 0.01, "grad_norm": 5.6855003715748955, "learning_rate": 4.136460554371002e-06, "loss": 0.9306, "step": 194 }, { "epoch": 0.01, "grad_norm": 4.499616585364674, "learning_rate": 4.157782515991471e-06, "loss": 0.8996, "step": 195 }, { "epoch": 0.01, "grad_norm": 2.9615975048743017, "learning_rate": 4.17910447761194e-06, "loss": 0.9079, "step": 196 }, { "epoch": 0.01, "grad_norm": 1.39577172002921, "learning_rate": 4.200426439232409e-06, "loss": 0.771, "step": 197 }, { "epoch": 0.01, "grad_norm": 3.0166632178227317, "learning_rate": 4.221748400852878e-06, "loss": 0.9673, "step": 198 }, { "epoch": 0.01, "grad_norm": 4.302850556968944, "learning_rate": 4.243070362473347e-06, "loss": 0.9225, "step": 199 }, { "epoch": 0.01, "grad_norm": 3.0695409581073747, "learning_rate": 4.264392324093816e-06, "loss": 0.9573, "step": 200 }, { "epoch": 0.01, "grad_norm": 17.115783405931975, "learning_rate": 4.2857142857142855e-06, "loss": 0.8306, "step": 201 }, { "epoch": 0.01, "grad_norm": 3.4228812972376255, "learning_rate": 4.3070362473347545e-06, "loss": 0.8803, "step": 202 }, { "epoch": 0.01, "grad_norm": 3.9217484634995725, "learning_rate": 4.3283582089552236e-06, "loss": 0.9313, "step": 203 }, { "epoch": 0.01, "grad_norm": 4.035135405546231, "learning_rate": 4.3496801705756935e-06, "loss": 0.9132, "step": 204 }, { "epoch": 0.01, "grad_norm": 5.315187524436118, "learning_rate": 4.3710021321961625e-06, "loss": 0.9685, "step": 205 }, { "epoch": 0.01, "grad_norm": 6.843101231920591, "learning_rate": 4.3923240938166316e-06, "loss": 0.8867, "step": 206 }, { "epoch": 0.01, "grad_norm": 1.4906163283109912, "learning_rate": 4.413646055437101e-06, "loss": 0.6321, "step": 207 }, { "epoch": 0.01, "grad_norm": 1.2540958833216937, "learning_rate": 4.43496801705757e-06, "loss": 0.6616, "step": 208 }, { "epoch": 0.01, "grad_norm": 6.551405958216888, "learning_rate": 4.456289978678039e-06, "loss": 0.9394, "step": 209 }, { "epoch": 0.01, "grad_norm": 1.8679448634514464, "learning_rate": 4.477611940298508e-06, "loss": 0.8898, "step": 210 }, { "epoch": 0.01, "grad_norm": 5.543215975703094, "learning_rate": 4.498933901918977e-06, "loss": 0.9663, "step": 211 }, { "epoch": 0.01, "grad_norm": 2.845750393980758, "learning_rate": 4.520255863539446e-06, "loss": 1.0041, "step": 212 }, { "epoch": 0.01, "grad_norm": 4.5852331059536775, "learning_rate": 4.541577825159915e-06, "loss": 0.9291, "step": 213 }, { "epoch": 0.01, "grad_norm": 2.143941800655154, "learning_rate": 4.562899786780384e-06, "loss": 0.971, "step": 214 }, { "epoch": 0.01, "grad_norm": 4.116048048117803, "learning_rate": 4.584221748400853e-06, "loss": 0.9209, "step": 215 }, { "epoch": 0.01, "grad_norm": 3.2952520427478778, "learning_rate": 4.605543710021322e-06, "loss": 0.9064, "step": 216 }, { "epoch": 0.01, "grad_norm": 7.100308770507937, "learning_rate": 4.626865671641791e-06, "loss": 0.9953, "step": 217 }, { "epoch": 0.01, "grad_norm": 5.1544642823400215, "learning_rate": 4.64818763326226e-06, "loss": 0.8968, "step": 218 }, { "epoch": 0.01, "grad_norm": 1.2226417146057313, "learning_rate": 4.669509594882729e-06, "loss": 0.6608, "step": 219 }, { "epoch": 0.01, "grad_norm": 7.99243688303641, "learning_rate": 4.690831556503198e-06, "loss": 0.9958, "step": 220 }, { "epoch": 0.01, "grad_norm": 11.095583076112204, "learning_rate": 4.712153518123667e-06, "loss": 0.9108, "step": 221 }, { "epoch": 0.01, "grad_norm": 5.66875633498436, "learning_rate": 4.733475479744136e-06, "loss": 0.8628, "step": 222 }, { "epoch": 0.01, "grad_norm": 2.029177750858604, "learning_rate": 4.7547974413646055e-06, "loss": 0.9265, "step": 223 }, { "epoch": 0.01, "grad_norm": 2.3956356012420987, "learning_rate": 4.7761194029850745e-06, "loss": 0.9056, "step": 224 }, { "epoch": 0.01, "grad_norm": 1.3033854549615078, "learning_rate": 4.797441364605544e-06, "loss": 0.6413, "step": 225 }, { "epoch": 0.01, "grad_norm": 5.508124093751844, "learning_rate": 4.8187633262260135e-06, "loss": 0.902, "step": 226 }, { "epoch": 0.01, "grad_norm": 17.169444675829684, "learning_rate": 4.8400852878464825e-06, "loss": 0.9874, "step": 227 }, { "epoch": 0.01, "grad_norm": 18.242100728218194, "learning_rate": 4.8614072494669516e-06, "loss": 0.9728, "step": 228 }, { "epoch": 0.01, "grad_norm": 39.70679547881365, "learning_rate": 4.882729211087421e-06, "loss": 0.9471, "step": 229 }, { "epoch": 0.01, "grad_norm": 2.659584620471819, "learning_rate": 4.90405117270789e-06, "loss": 0.8661, "step": 230 }, { "epoch": 0.01, "grad_norm": 3.473419578898214, "learning_rate": 4.925373134328359e-06, "loss": 0.8378, "step": 231 }, { "epoch": 0.01, "grad_norm": 2.3851995726893107, "learning_rate": 4.946695095948828e-06, "loss": 0.8453, "step": 232 }, { "epoch": 0.01, "grad_norm": 3.1174192846680286, "learning_rate": 4.968017057569297e-06, "loss": 0.8727, "step": 233 }, { "epoch": 0.01, "grad_norm": 5.495234996961923, "learning_rate": 4.989339019189766e-06, "loss": 0.9104, "step": 234 }, { "epoch": 0.02, "grad_norm": 2.81931490860964, "learning_rate": 5.010660980810235e-06, "loss": 0.8834, "step": 235 }, { "epoch": 0.02, "grad_norm": 3.0654428256703787, "learning_rate": 5.031982942430704e-06, "loss": 0.834, "step": 236 }, { "epoch": 0.02, "grad_norm": 6.300595227773571, "learning_rate": 5.053304904051173e-06, "loss": 0.8371, "step": 237 }, { "epoch": 0.02, "grad_norm": 4.4497792231638815, "learning_rate": 5.074626865671642e-06, "loss": 0.8271, "step": 238 }, { "epoch": 0.02, "grad_norm": 1.2555359833699113, "learning_rate": 5.095948827292111e-06, "loss": 0.6154, "step": 239 }, { "epoch": 0.02, "grad_norm": 4.414995700606566, "learning_rate": 5.11727078891258e-06, "loss": 0.8806, "step": 240 }, { "epoch": 0.02, "grad_norm": 4.8591058831295735, "learning_rate": 5.138592750533049e-06, "loss": 0.8779, "step": 241 }, { "epoch": 0.02, "grad_norm": 2.836788270696881, "learning_rate": 5.159914712153518e-06, "loss": 0.9146, "step": 242 }, { "epoch": 0.02, "grad_norm": 2.21765358965867, "learning_rate": 5.181236673773987e-06, "loss": 0.8593, "step": 243 }, { "epoch": 0.02, "grad_norm": 4.799942542737662, "learning_rate": 5.202558635394456e-06, "loss": 0.8368, "step": 244 }, { "epoch": 0.02, "grad_norm": 4.1903297139670705, "learning_rate": 5.2238805970149255e-06, "loss": 0.9709, "step": 245 }, { "epoch": 0.02, "grad_norm": 4.001150312766646, "learning_rate": 5.245202558635395e-06, "loss": 0.9355, "step": 246 }, { "epoch": 0.02, "grad_norm": 5.600593861904971, "learning_rate": 5.2665245202558636e-06, "loss": 0.9622, "step": 247 }, { "epoch": 0.02, "grad_norm": 3.058088397462161, "learning_rate": 5.2878464818763335e-06, "loss": 0.9713, "step": 248 }, { "epoch": 0.02, "grad_norm": 1.5253384000923302, "learning_rate": 5.309168443496802e-06, "loss": 0.7066, "step": 249 }, { "epoch": 0.02, "grad_norm": 3.227929218068403, "learning_rate": 5.3304904051172716e-06, "loss": 0.9388, "step": 250 }, { "epoch": 0.02, "grad_norm": 4.077564588857529, "learning_rate": 5.351812366737741e-06, "loss": 0.8905, "step": 251 }, { "epoch": 0.02, "grad_norm": 2.333640171175523, "learning_rate": 5.37313432835821e-06, "loss": 0.8153, "step": 252 }, { "epoch": 0.02, "grad_norm": 2.2498469072970413, "learning_rate": 5.394456289978679e-06, "loss": 0.8982, "step": 253 }, { "epoch": 0.02, "grad_norm": 4.874743860799519, "learning_rate": 5.415778251599148e-06, "loss": 0.9627, "step": 254 }, { "epoch": 0.02, "grad_norm": 1.84467699019749, "learning_rate": 5.437100213219617e-06, "loss": 0.8895, "step": 255 }, { "epoch": 0.02, "grad_norm": 1.226547387457045, "learning_rate": 5.458422174840086e-06, "loss": 0.704, "step": 256 }, { "epoch": 0.02, "grad_norm": 2.8173247952568747, "learning_rate": 5.479744136460555e-06, "loss": 0.9292, "step": 257 }, { "epoch": 0.02, "grad_norm": 3.2757382622943108, "learning_rate": 5.501066098081024e-06, "loss": 0.841, "step": 258 }, { "epoch": 0.02, "grad_norm": 15.410412011883917, "learning_rate": 5.522388059701493e-06, "loss": 0.9712, "step": 259 }, { "epoch": 0.02, "grad_norm": 1.7089933155844648, "learning_rate": 5.543710021321962e-06, "loss": 0.8854, "step": 260 }, { "epoch": 0.02, "grad_norm": 3.412284156441837, "learning_rate": 5.565031982942431e-06, "loss": 0.9947, "step": 261 }, { "epoch": 0.02, "grad_norm": 1.9353874666259818, "learning_rate": 5.5863539445629e-06, "loss": 0.8657, "step": 262 }, { "epoch": 0.02, "grad_norm": 1.8096893931450064, "learning_rate": 5.607675906183369e-06, "loss": 0.9184, "step": 263 }, { "epoch": 0.02, "grad_norm": 2.1357752487401997, "learning_rate": 5.628997867803838e-06, "loss": 0.8681, "step": 264 }, { "epoch": 0.02, "grad_norm": 1.5952226113498904, "learning_rate": 5.650319829424308e-06, "loss": 0.899, "step": 265 }, { "epoch": 0.02, "grad_norm": 3.0912893841245146, "learning_rate": 5.671641791044776e-06, "loss": 0.9462, "step": 266 }, { "epoch": 0.02, "grad_norm": 2.642408691566095, "learning_rate": 5.692963752665246e-06, "loss": 0.846, "step": 267 }, { "epoch": 0.02, "grad_norm": 2.681828970346619, "learning_rate": 5.7142857142857145e-06, "loss": 0.9014, "step": 268 }, { "epoch": 0.02, "grad_norm": 2.4966941793918958, "learning_rate": 5.735607675906184e-06, "loss": 0.8968, "step": 269 }, { "epoch": 0.02, "grad_norm": 1.803203061647286, "learning_rate": 5.756929637526653e-06, "loss": 0.9556, "step": 270 }, { "epoch": 0.02, "grad_norm": 4.960309674246828, "learning_rate": 5.7782515991471225e-06, "loss": 0.9453, "step": 271 }, { "epoch": 0.02, "grad_norm": 1.8950124862239401, "learning_rate": 5.799573560767591e-06, "loss": 0.8588, "step": 272 }, { "epoch": 0.02, "grad_norm": 2.309734641378667, "learning_rate": 5.820895522388061e-06, "loss": 0.8615, "step": 273 }, { "epoch": 0.02, "grad_norm": 2.6252379117110864, "learning_rate": 5.842217484008529e-06, "loss": 0.9691, "step": 274 }, { "epoch": 0.02, "grad_norm": 1.6614819167839887, "learning_rate": 5.863539445628999e-06, "loss": 0.8685, "step": 275 }, { "epoch": 0.02, "grad_norm": 2.257587478816212, "learning_rate": 5.884861407249467e-06, "loss": 0.8664, "step": 276 }, { "epoch": 0.02, "grad_norm": 1.8501074474516999, "learning_rate": 5.906183368869937e-06, "loss": 0.8819, "step": 277 }, { "epoch": 0.02, "grad_norm": 1.3664294731371216, "learning_rate": 5.927505330490405e-06, "loss": 0.7205, "step": 278 }, { "epoch": 0.02, "grad_norm": 1.5221958604481185, "learning_rate": 5.948827292110875e-06, "loss": 0.6109, "step": 279 }, { "epoch": 0.02, "grad_norm": 1.4632437127701583, "learning_rate": 5.970149253731343e-06, "loss": 0.6772, "step": 280 }, { "epoch": 0.02, "grad_norm": 3.097174685642828, "learning_rate": 5.991471215351813e-06, "loss": 0.8594, "step": 281 }, { "epoch": 0.02, "grad_norm": 2.7536769419897675, "learning_rate": 6.012793176972282e-06, "loss": 0.8674, "step": 282 }, { "epoch": 0.02, "grad_norm": 2.007679145617383, "learning_rate": 6.034115138592751e-06, "loss": 0.8603, "step": 283 }, { "epoch": 0.02, "grad_norm": 2.039087114631518, "learning_rate": 6.055437100213221e-06, "loss": 0.8644, "step": 284 }, { "epoch": 0.02, "grad_norm": 2.0403121562450663, "learning_rate": 6.076759061833689e-06, "loss": 1.2057, "step": 285 }, { "epoch": 0.02, "grad_norm": 16.117072522148444, "learning_rate": 6.098081023454159e-06, "loss": 0.8452, "step": 286 }, { "epoch": 0.02, "grad_norm": 1.92736465141114, "learning_rate": 6.119402985074627e-06, "loss": 0.9094, "step": 287 }, { "epoch": 0.02, "grad_norm": 1.8889502616939113, "learning_rate": 6.140724946695097e-06, "loss": 1.0234, "step": 288 }, { "epoch": 0.02, "grad_norm": 2.4310846362130674, "learning_rate": 6.1620469083155655e-06, "loss": 0.8714, "step": 289 }, { "epoch": 0.02, "grad_norm": 1.8378942062358419, "learning_rate": 6.183368869936035e-06, "loss": 0.9069, "step": 290 }, { "epoch": 0.02, "grad_norm": 2.069108703299038, "learning_rate": 6.2046908315565036e-06, "loss": 0.8191, "step": 291 }, { "epoch": 0.02, "grad_norm": 2.052353190932862, "learning_rate": 6.2260127931769735e-06, "loss": 0.8982, "step": 292 }, { "epoch": 0.02, "grad_norm": 3.4579132308441967, "learning_rate": 6.247334754797442e-06, "loss": 0.9341, "step": 293 }, { "epoch": 0.02, "grad_norm": 2.0971510623187606, "learning_rate": 6.2686567164179116e-06, "loss": 0.9128, "step": 294 }, { "epoch": 0.02, "grad_norm": 1.7061333184471663, "learning_rate": 6.28997867803838e-06, "loss": 0.8087, "step": 295 }, { "epoch": 0.02, "grad_norm": 2.8319907006012293, "learning_rate": 6.31130063965885e-06, "loss": 0.9035, "step": 296 }, { "epoch": 0.02, "grad_norm": 2.050579330337886, "learning_rate": 6.332622601279318e-06, "loss": 0.8502, "step": 297 }, { "epoch": 0.02, "grad_norm": 2.1751350851556945, "learning_rate": 6.353944562899788e-06, "loss": 0.9123, "step": 298 }, { "epoch": 0.02, "grad_norm": 2.2762866899369674, "learning_rate": 6.375266524520256e-06, "loss": 0.8049, "step": 299 }, { "epoch": 0.02, "grad_norm": 1.2589720824410566, "learning_rate": 6.396588486140726e-06, "loss": 0.6135, "step": 300 }, { "epoch": 0.02, "grad_norm": 1.958444662200308, "learning_rate": 6.417910447761194e-06, "loss": 0.8796, "step": 301 }, { "epoch": 0.02, "grad_norm": 2.5208773937283446, "learning_rate": 6.439232409381664e-06, "loss": 0.9727, "step": 302 }, { "epoch": 0.02, "grad_norm": 1.472316083317676, "learning_rate": 6.460554371002132e-06, "loss": 0.585, "step": 303 }, { "epoch": 0.02, "grad_norm": 1.7591347116231222, "learning_rate": 6.481876332622602e-06, "loss": 0.9572, "step": 304 }, { "epoch": 0.02, "grad_norm": 2.7887962958778667, "learning_rate": 6.50319829424307e-06, "loss": 0.8483, "step": 305 }, { "epoch": 0.02, "grad_norm": 2.2632032865315046, "learning_rate": 6.52452025586354e-06, "loss": 0.9081, "step": 306 }, { "epoch": 0.02, "grad_norm": 1.952031413414524, "learning_rate": 6.545842217484008e-06, "loss": 0.9017, "step": 307 }, { "epoch": 0.02, "grad_norm": 5.611162848742265, "learning_rate": 6.567164179104478e-06, "loss": 0.9236, "step": 308 }, { "epoch": 0.02, "grad_norm": 1.8531930238795034, "learning_rate": 6.5884861407249465e-06, "loss": 0.8494, "step": 309 }, { "epoch": 0.02, "grad_norm": 2.302235267888257, "learning_rate": 6.609808102345416e-06, "loss": 0.7878, "step": 310 }, { "epoch": 0.02, "grad_norm": 1.9946339232261425, "learning_rate": 6.631130063965885e-06, "loss": 0.8857, "step": 311 }, { "epoch": 0.02, "grad_norm": 2.9314011245587968, "learning_rate": 6.6524520255863545e-06, "loss": 0.9135, "step": 312 }, { "epoch": 0.02, "grad_norm": 2.385388358973983, "learning_rate": 6.673773987206824e-06, "loss": 0.9223, "step": 313 }, { "epoch": 0.02, "grad_norm": 1.8080119696676618, "learning_rate": 6.695095948827293e-06, "loss": 0.9385, "step": 314 }, { "epoch": 0.02, "grad_norm": 2.6244038733968353, "learning_rate": 6.7164179104477625e-06, "loss": 0.9454, "step": 315 }, { "epoch": 0.02, "grad_norm": 2.0666388095361574, "learning_rate": 6.737739872068231e-06, "loss": 0.8616, "step": 316 }, { "epoch": 0.02, "grad_norm": 1.7101600926318348, "learning_rate": 6.759061833688701e-06, "loss": 0.8521, "step": 317 }, { "epoch": 0.02, "grad_norm": 1.650219095300853, "learning_rate": 6.780383795309169e-06, "loss": 0.9543, "step": 318 }, { "epoch": 0.02, "grad_norm": 1.4384637347241793, "learning_rate": 6.801705756929639e-06, "loss": 0.6199, "step": 319 }, { "epoch": 0.02, "grad_norm": 1.7145102200076903, "learning_rate": 6.823027718550107e-06, "loss": 0.8699, "step": 320 }, { "epoch": 0.02, "grad_norm": 1.7542087777991697, "learning_rate": 6.844349680170577e-06, "loss": 0.8623, "step": 321 }, { "epoch": 0.02, "grad_norm": 1.9464633168660976, "learning_rate": 6.865671641791045e-06, "loss": 0.8817, "step": 322 }, { "epoch": 0.02, "grad_norm": 2.000888678794106, "learning_rate": 6.886993603411515e-06, "loss": 0.8863, "step": 323 }, { "epoch": 0.02, "grad_norm": 1.8438051293462372, "learning_rate": 6.908315565031983e-06, "loss": 0.8553, "step": 324 }, { "epoch": 0.02, "grad_norm": 2.2634544494168765, "learning_rate": 6.929637526652453e-06, "loss": 0.9786, "step": 325 }, { "epoch": 0.02, "grad_norm": 1.9732666667653587, "learning_rate": 6.950959488272921e-06, "loss": 0.9589, "step": 326 }, { "epoch": 0.02, "grad_norm": 2.800471046216165, "learning_rate": 6.972281449893391e-06, "loss": 0.9324, "step": 327 }, { "epoch": 0.02, "grad_norm": 1.8288517043510857, "learning_rate": 6.993603411513859e-06, "loss": 0.977, "step": 328 }, { "epoch": 0.02, "grad_norm": 2.1642112333094374, "learning_rate": 7.014925373134329e-06, "loss": 0.9412, "step": 329 }, { "epoch": 0.02, "grad_norm": 1.7414904513139409, "learning_rate": 7.0362473347547975e-06, "loss": 0.9222, "step": 330 }, { "epoch": 0.02, "grad_norm": 1.8642266779882526, "learning_rate": 7.057569296375267e-06, "loss": 0.8962, "step": 331 }, { "epoch": 0.02, "grad_norm": 1.844767880797686, "learning_rate": 7.0788912579957356e-06, "loss": 0.8901, "step": 332 }, { "epoch": 0.02, "grad_norm": 1.8062177038918192, "learning_rate": 7.1002132196162055e-06, "loss": 0.7942, "step": 333 }, { "epoch": 0.02, "grad_norm": 1.8562978209154368, "learning_rate": 7.121535181236674e-06, "loss": 0.8558, "step": 334 }, { "epoch": 0.02, "grad_norm": 1.6507822814459414, "learning_rate": 7.1428571428571436e-06, "loss": 0.8096, "step": 335 }, { "epoch": 0.02, "grad_norm": 2.147178320265339, "learning_rate": 7.164179104477612e-06, "loss": 0.9279, "step": 336 }, { "epoch": 0.02, "grad_norm": 1.737188054140506, "learning_rate": 7.185501066098082e-06, "loss": 0.8668, "step": 337 }, { "epoch": 0.02, "grad_norm": 1.8858140259829506, "learning_rate": 7.20682302771855e-06, "loss": 0.867, "step": 338 }, { "epoch": 0.02, "grad_norm": 1.6703231707952202, "learning_rate": 7.22814498933902e-06, "loss": 0.9115, "step": 339 }, { "epoch": 0.02, "grad_norm": 1.9643676368027492, "learning_rate": 7.249466950959488e-06, "loss": 0.9003, "step": 340 }, { "epoch": 0.02, "grad_norm": 1.8146629492130966, "learning_rate": 7.270788912579958e-06, "loss": 0.8881, "step": 341 }, { "epoch": 0.02, "grad_norm": 2.135108367637657, "learning_rate": 7.292110874200427e-06, "loss": 0.8453, "step": 342 }, { "epoch": 0.02, "grad_norm": 1.9493505651801444, "learning_rate": 7.313432835820896e-06, "loss": 0.8396, "step": 343 }, { "epoch": 0.02, "grad_norm": 2.3043845066823847, "learning_rate": 7.334754797441366e-06, "loss": 0.8598, "step": 344 }, { "epoch": 0.02, "grad_norm": 2.0206918008470702, "learning_rate": 7.356076759061834e-06, "loss": 0.8648, "step": 345 }, { "epoch": 0.02, "grad_norm": 2.0012119448189707, "learning_rate": 7.377398720682304e-06, "loss": 0.8508, "step": 346 }, { "epoch": 0.02, "grad_norm": 2.412047360081755, "learning_rate": 7.398720682302772e-06, "loss": 0.843, "step": 347 }, { "epoch": 0.02, "grad_norm": 1.8067845837555794, "learning_rate": 7.420042643923242e-06, "loss": 0.9646, "step": 348 }, { "epoch": 0.02, "grad_norm": 1.7353550306549994, "learning_rate": 7.44136460554371e-06, "loss": 0.8618, "step": 349 }, { "epoch": 0.02, "grad_norm": 2.212495927755603, "learning_rate": 7.46268656716418e-06, "loss": 0.9187, "step": 350 }, { "epoch": 0.02, "grad_norm": 2.3624326901996673, "learning_rate": 7.484008528784648e-06, "loss": 0.9069, "step": 351 }, { "epoch": 0.02, "grad_norm": 1.6512791789535441, "learning_rate": 7.505330490405118e-06, "loss": 0.8632, "step": 352 }, { "epoch": 0.02, "grad_norm": 1.4347235764017268, "learning_rate": 7.5266524520255865e-06, "loss": 0.7579, "step": 353 }, { "epoch": 0.02, "grad_norm": 2.2941765307586452, "learning_rate": 7.547974413646056e-06, "loss": 0.9265, "step": 354 }, { "epoch": 0.02, "grad_norm": 2.037006887300922, "learning_rate": 7.569296375266525e-06, "loss": 0.8474, "step": 355 }, { "epoch": 0.02, "grad_norm": 1.9904080098885895, "learning_rate": 7.5906183368869945e-06, "loss": 0.8909, "step": 356 }, { "epoch": 0.02, "grad_norm": 1.7097166983280117, "learning_rate": 7.611940298507463e-06, "loss": 0.8635, "step": 357 }, { "epoch": 0.02, "grad_norm": 1.83198213969312, "learning_rate": 7.633262260127933e-06, "loss": 0.8486, "step": 358 }, { "epoch": 0.02, "grad_norm": 1.6047840668800617, "learning_rate": 7.654584221748402e-06, "loss": 0.8165, "step": 359 }, { "epoch": 0.02, "grad_norm": 2.0977530096460715, "learning_rate": 7.67590618336887e-06, "loss": 0.8824, "step": 360 }, { "epoch": 0.02, "grad_norm": 2.6104891224572624, "learning_rate": 7.69722814498934e-06, "loss": 0.8741, "step": 361 }, { "epoch": 0.02, "grad_norm": 3.5478450478118857, "learning_rate": 7.718550106609809e-06, "loss": 0.8948, "step": 362 }, { "epoch": 0.02, "grad_norm": 1.7323780854116315, "learning_rate": 7.739872068230278e-06, "loss": 0.7738, "step": 363 }, { "epoch": 0.02, "grad_norm": 1.3384156252285553, "learning_rate": 7.761194029850747e-06, "loss": 0.6574, "step": 364 }, { "epoch": 0.02, "grad_norm": 1.6356183496291847, "learning_rate": 7.782515991471216e-06, "loss": 0.7822, "step": 365 }, { "epoch": 0.02, "grad_norm": 1.6447601310949893, "learning_rate": 7.803837953091685e-06, "loss": 0.7804, "step": 366 }, { "epoch": 0.02, "grad_norm": 1.8382866775743816, "learning_rate": 7.825159914712154e-06, "loss": 0.8784, "step": 367 }, { "epoch": 0.02, "grad_norm": 1.7248168208525498, "learning_rate": 7.846481876332623e-06, "loss": 0.8155, "step": 368 }, { "epoch": 0.02, "grad_norm": 2.255480650586059, "learning_rate": 7.867803837953092e-06, "loss": 0.8466, "step": 369 }, { "epoch": 0.02, "grad_norm": 1.316050968960616, "learning_rate": 7.889125799573561e-06, "loss": 0.7104, "step": 370 }, { "epoch": 0.02, "grad_norm": 1.694202641402023, "learning_rate": 7.91044776119403e-06, "loss": 0.7895, "step": 371 }, { "epoch": 0.02, "grad_norm": 1.617151014770389, "learning_rate": 7.9317697228145e-06, "loss": 0.7623, "step": 372 }, { "epoch": 0.02, "grad_norm": 2.617188841968736, "learning_rate": 7.953091684434968e-06, "loss": 0.8162, "step": 373 }, { "epoch": 0.02, "grad_norm": 1.8639550932676956, "learning_rate": 7.974413646055437e-06, "loss": 0.8899, "step": 374 }, { "epoch": 0.02, "grad_norm": 1.7704786712948855, "learning_rate": 7.995735607675907e-06, "loss": 0.8618, "step": 375 }, { "epoch": 0.02, "grad_norm": 1.6211318591911723, "learning_rate": 8.017057569296376e-06, "loss": 0.9013, "step": 376 }, { "epoch": 0.02, "grad_norm": 1.88992483718311, "learning_rate": 8.038379530916846e-06, "loss": 0.7212, "step": 377 }, { "epoch": 0.02, "grad_norm": 1.5073298018027965, "learning_rate": 8.059701492537314e-06, "loss": 0.6692, "step": 378 }, { "epoch": 0.02, "grad_norm": 1.6982848099285341, "learning_rate": 8.081023454157784e-06, "loss": 0.8215, "step": 379 }, { "epoch": 0.02, "grad_norm": 1.9900730268427413, "learning_rate": 8.102345415778252e-06, "loss": 0.853, "step": 380 }, { "epoch": 0.02, "grad_norm": 1.9871810999993413, "learning_rate": 8.123667377398723e-06, "loss": 0.9328, "step": 381 }, { "epoch": 0.02, "grad_norm": 3.1841145132951203, "learning_rate": 8.14498933901919e-06, "loss": 0.873, "step": 382 }, { "epoch": 0.02, "grad_norm": 1.828789084695413, "learning_rate": 8.16631130063966e-06, "loss": 0.8091, "step": 383 }, { "epoch": 0.02, "grad_norm": 1.5169881386770319, "learning_rate": 8.187633262260128e-06, "loss": 0.6345, "step": 384 }, { "epoch": 0.02, "grad_norm": 1.950140863686309, "learning_rate": 8.208955223880599e-06, "loss": 0.8024, "step": 385 }, { "epoch": 0.02, "grad_norm": 2.0757942775632503, "learning_rate": 8.230277185501066e-06, "loss": 0.8738, "step": 386 }, { "epoch": 0.02, "grad_norm": 2.0063342492363154, "learning_rate": 8.251599147121537e-06, "loss": 0.8814, "step": 387 }, { "epoch": 0.02, "grad_norm": 1.6944838999885934, "learning_rate": 8.272921108742004e-06, "loss": 0.919, "step": 388 }, { "epoch": 0.02, "grad_norm": 1.6860109401995769, "learning_rate": 8.294243070362475e-06, "loss": 0.8043, "step": 389 }, { "epoch": 0.02, "grad_norm": 2.398658440620031, "learning_rate": 8.315565031982942e-06, "loss": 0.9234, "step": 390 }, { "epoch": 0.03, "grad_norm": 1.300219214795183, "learning_rate": 8.336886993603413e-06, "loss": 0.6116, "step": 391 }, { "epoch": 0.03, "grad_norm": 1.7780396586235412, "learning_rate": 8.35820895522388e-06, "loss": 0.9241, "step": 392 }, { "epoch": 0.03, "grad_norm": 1.8925743000033761, "learning_rate": 8.379530916844351e-06, "loss": 0.9086, "step": 393 }, { "epoch": 0.03, "grad_norm": 1.946021463358611, "learning_rate": 8.400852878464819e-06, "loss": 0.871, "step": 394 }, { "epoch": 0.03, "grad_norm": 1.1977329340188387, "learning_rate": 8.42217484008529e-06, "loss": 0.7649, "step": 395 }, { "epoch": 0.03, "grad_norm": 1.337751181151636, "learning_rate": 8.443496801705757e-06, "loss": 0.5912, "step": 396 }, { "epoch": 0.03, "grad_norm": 1.7903586936738265, "learning_rate": 8.464818763326227e-06, "loss": 0.7746, "step": 397 }, { "epoch": 0.03, "grad_norm": 1.6447397539368334, "learning_rate": 8.486140724946695e-06, "loss": 0.6828, "step": 398 }, { "epoch": 0.03, "grad_norm": 1.9996991819890837, "learning_rate": 8.507462686567165e-06, "loss": 0.8848, "step": 399 }, { "epoch": 0.03, "grad_norm": 1.6334095861130147, "learning_rate": 8.528784648187633e-06, "loss": 0.87, "step": 400 }, { "epoch": 0.03, "grad_norm": 1.6105065438912325, "learning_rate": 8.550106609808104e-06, "loss": 0.8029, "step": 401 }, { "epoch": 0.03, "grad_norm": 3.130688613185073, "learning_rate": 8.571428571428571e-06, "loss": 0.8712, "step": 402 }, { "epoch": 0.03, "grad_norm": 2.088796964783549, "learning_rate": 8.592750533049042e-06, "loss": 0.9191, "step": 403 }, { "epoch": 0.03, "grad_norm": 1.279968576900911, "learning_rate": 8.614072494669509e-06, "loss": 0.6768, "step": 404 }, { "epoch": 0.03, "grad_norm": 1.6258726233684464, "learning_rate": 8.63539445628998e-06, "loss": 0.7694, "step": 405 }, { "epoch": 0.03, "grad_norm": 3.0815687555934645, "learning_rate": 8.656716417910447e-06, "loss": 0.8068, "step": 406 }, { "epoch": 0.03, "grad_norm": 1.662559777076319, "learning_rate": 8.678038379530918e-06, "loss": 0.7789, "step": 407 }, { "epoch": 0.03, "grad_norm": 1.8247439693856349, "learning_rate": 8.699360341151387e-06, "loss": 0.864, "step": 408 }, { "epoch": 0.03, "grad_norm": 2.047667380986838, "learning_rate": 8.720682302771856e-06, "loss": 0.8963, "step": 409 }, { "epoch": 0.03, "grad_norm": 1.8802990853189707, "learning_rate": 8.742004264392325e-06, "loss": 0.8083, "step": 410 }, { "epoch": 0.03, "grad_norm": 1.9550105155585529, "learning_rate": 8.763326226012794e-06, "loss": 0.8532, "step": 411 }, { "epoch": 0.03, "grad_norm": 2.8679142565257703, "learning_rate": 8.784648187633263e-06, "loss": 0.9066, "step": 412 }, { "epoch": 0.03, "grad_norm": 1.1797167025321755, "learning_rate": 8.805970149253732e-06, "loss": 0.7514, "step": 413 }, { "epoch": 0.03, "grad_norm": 2.0394909769453116, "learning_rate": 8.827292110874201e-06, "loss": 0.9312, "step": 414 }, { "epoch": 0.03, "grad_norm": 1.6158867893962947, "learning_rate": 8.84861407249467e-06, "loss": 0.6567, "step": 415 }, { "epoch": 0.03, "grad_norm": 2.1181124963076208, "learning_rate": 8.86993603411514e-06, "loss": 0.7787, "step": 416 }, { "epoch": 0.03, "grad_norm": 1.9801479759794522, "learning_rate": 8.891257995735608e-06, "loss": 0.7776, "step": 417 }, { "epoch": 0.03, "grad_norm": 1.9151763049640556, "learning_rate": 8.912579957356077e-06, "loss": 0.919, "step": 418 }, { "epoch": 0.03, "grad_norm": 1.9097008242736957, "learning_rate": 8.933901918976547e-06, "loss": 0.8209, "step": 419 }, { "epoch": 0.03, "grad_norm": 1.2030691447843767, "learning_rate": 8.955223880597016e-06, "loss": 0.6793, "step": 420 }, { "epoch": 0.03, "grad_norm": 2.041456857301367, "learning_rate": 8.976545842217485e-06, "loss": 0.8259, "step": 421 }, { "epoch": 0.03, "grad_norm": 2.223368510820626, "learning_rate": 8.997867803837954e-06, "loss": 0.8604, "step": 422 }, { "epoch": 0.03, "grad_norm": 2.6062496985783934, "learning_rate": 9.019189765458423e-06, "loss": 0.824, "step": 423 }, { "epoch": 0.03, "grad_norm": 1.9755211391218666, "learning_rate": 9.040511727078892e-06, "loss": 0.9203, "step": 424 }, { "epoch": 0.03, "grad_norm": 1.4423395766478007, "learning_rate": 9.06183368869936e-06, "loss": 0.736, "step": 425 }, { "epoch": 0.03, "grad_norm": 1.8838148767285907, "learning_rate": 9.08315565031983e-06, "loss": 0.9238, "step": 426 }, { "epoch": 0.03, "grad_norm": 2.0657715392156835, "learning_rate": 9.104477611940299e-06, "loss": 0.894, "step": 427 }, { "epoch": 0.03, "grad_norm": 1.8643937365377221, "learning_rate": 9.125799573560768e-06, "loss": 0.9142, "step": 428 }, { "epoch": 0.03, "grad_norm": 2.502708139871198, "learning_rate": 9.147121535181237e-06, "loss": 0.8598, "step": 429 }, { "epoch": 0.03, "grad_norm": 1.6683263478944121, "learning_rate": 9.168443496801706e-06, "loss": 0.9061, "step": 430 }, { "epoch": 0.03, "grad_norm": 2.5307446277289363, "learning_rate": 9.189765458422175e-06, "loss": 0.8012, "step": 431 }, { "epoch": 0.03, "grad_norm": 1.910969825974653, "learning_rate": 9.211087420042644e-06, "loss": 0.803, "step": 432 }, { "epoch": 0.03, "grad_norm": 1.7365738860536795, "learning_rate": 9.232409381663113e-06, "loss": 0.811, "step": 433 }, { "epoch": 0.03, "grad_norm": 1.8908157041936797, "learning_rate": 9.253731343283582e-06, "loss": 0.8448, "step": 434 }, { "epoch": 0.03, "grad_norm": 1.7247323644565928, "learning_rate": 9.275053304904051e-06, "loss": 0.8911, "step": 435 }, { "epoch": 0.03, "grad_norm": 2.4653253223584346, "learning_rate": 9.29637526652452e-06, "loss": 0.878, "step": 436 }, { "epoch": 0.03, "grad_norm": 1.7258918540955956, "learning_rate": 9.31769722814499e-06, "loss": 0.8167, "step": 437 }, { "epoch": 0.03, "grad_norm": 1.6136753711469187, "learning_rate": 9.339019189765458e-06, "loss": 0.8249, "step": 438 }, { "epoch": 0.03, "grad_norm": 1.9665496682168528, "learning_rate": 9.36034115138593e-06, "loss": 0.9266, "step": 439 }, { "epoch": 0.03, "grad_norm": 1.7241020783969445, "learning_rate": 9.381663113006397e-06, "loss": 0.8613, "step": 440 }, { "epoch": 0.03, "grad_norm": 1.68943050020209, "learning_rate": 9.402985074626867e-06, "loss": 0.8405, "step": 441 }, { "epoch": 0.03, "grad_norm": 1.7817287722366635, "learning_rate": 9.424307036247335e-06, "loss": 0.9395, "step": 442 }, { "epoch": 0.03, "grad_norm": 1.7705251385402696, "learning_rate": 9.445628997867805e-06, "loss": 0.8514, "step": 443 }, { "epoch": 0.03, "grad_norm": 2.397847350473223, "learning_rate": 9.466950959488273e-06, "loss": 0.9511, "step": 444 }, { "epoch": 0.03, "grad_norm": 1.80101329429799, "learning_rate": 9.488272921108744e-06, "loss": 0.8851, "step": 445 }, { "epoch": 0.03, "grad_norm": 1.8960168773736337, "learning_rate": 9.509594882729211e-06, "loss": 0.8506, "step": 446 }, { "epoch": 0.03, "grad_norm": 1.8109491024004636, "learning_rate": 9.530916844349682e-06, "loss": 0.8367, "step": 447 }, { "epoch": 0.03, "grad_norm": 2.0227912927442833, "learning_rate": 9.552238805970149e-06, "loss": 0.8246, "step": 448 }, { "epoch": 0.03, "grad_norm": 1.7092986048847902, "learning_rate": 9.57356076759062e-06, "loss": 0.8316, "step": 449 }, { "epoch": 0.03, "grad_norm": 3.577314703810548, "learning_rate": 9.594882729211089e-06, "loss": 0.8073, "step": 450 }, { "epoch": 0.03, "grad_norm": 2.16189902933013, "learning_rate": 9.616204690831558e-06, "loss": 0.8599, "step": 451 }, { "epoch": 0.03, "grad_norm": 1.8139370429233008, "learning_rate": 9.637526652452027e-06, "loss": 0.9018, "step": 452 }, { "epoch": 0.03, "grad_norm": 1.7897752614929137, "learning_rate": 9.658848614072496e-06, "loss": 1.0197, "step": 453 }, { "epoch": 0.03, "grad_norm": 2.1262851596566823, "learning_rate": 9.680170575692965e-06, "loss": 0.8073, "step": 454 }, { "epoch": 0.03, "grad_norm": 2.3053448687372127, "learning_rate": 9.701492537313434e-06, "loss": 0.7773, "step": 455 }, { "epoch": 0.03, "grad_norm": 2.17728220405043, "learning_rate": 9.722814498933903e-06, "loss": 0.8688, "step": 456 }, { "epoch": 0.03, "grad_norm": 2.046857427615329, "learning_rate": 9.744136460554372e-06, "loss": 0.8111, "step": 457 }, { "epoch": 0.03, "grad_norm": 2.120419093028288, "learning_rate": 9.765458422174841e-06, "loss": 0.8678, "step": 458 }, { "epoch": 0.03, "grad_norm": 1.7930508349416632, "learning_rate": 9.78678038379531e-06, "loss": 0.8515, "step": 459 }, { "epoch": 0.03, "grad_norm": 1.9280642769396965, "learning_rate": 9.80810234541578e-06, "loss": 0.9375, "step": 460 }, { "epoch": 0.03, "grad_norm": 2.2198398580167122, "learning_rate": 9.829424307036248e-06, "loss": 0.9155, "step": 461 }, { "epoch": 0.03, "grad_norm": 1.4725635031565742, "learning_rate": 9.850746268656717e-06, "loss": 0.6824, "step": 462 }, { "epoch": 0.03, "grad_norm": 2.204993440341403, "learning_rate": 9.872068230277187e-06, "loss": 0.7977, "step": 463 }, { "epoch": 0.03, "grad_norm": 1.41499129275883, "learning_rate": 9.893390191897656e-06, "loss": 0.6219, "step": 464 }, { "epoch": 0.03, "grad_norm": 1.7741266318397364, "learning_rate": 9.914712153518125e-06, "loss": 0.8424, "step": 465 }, { "epoch": 0.03, "grad_norm": 1.8604304972618413, "learning_rate": 9.936034115138594e-06, "loss": 0.7733, "step": 466 }, { "epoch": 0.03, "grad_norm": 2.255793729175227, "learning_rate": 9.957356076759063e-06, "loss": 0.8521, "step": 467 }, { "epoch": 0.03, "grad_norm": 1.8131185529593563, "learning_rate": 9.978678038379532e-06, "loss": 0.7913, "step": 468 }, { "epoch": 0.03, "grad_norm": 1.6665732056021707, "learning_rate": 1e-05, "loss": 0.8175, "step": 469 }, { "epoch": 0.03, "grad_norm": 1.8621235240396543, "learning_rate": 9.999999892555254e-06, "loss": 0.9104, "step": 470 }, { "epoch": 0.03, "grad_norm": 1.8764373881515006, "learning_rate": 9.999999570221018e-06, "loss": 0.8901, "step": 471 }, { "epoch": 0.03, "grad_norm": 2.148429261988391, "learning_rate": 9.999999032997307e-06, "loss": 0.7849, "step": 472 }, { "epoch": 0.03, "grad_norm": 2.177903862357693, "learning_rate": 9.999998280884144e-06, "loss": 0.9335, "step": 473 }, { "epoch": 0.03, "grad_norm": 1.703986899149118, "learning_rate": 9.999997313881561e-06, "loss": 0.826, "step": 474 }, { "epoch": 0.03, "grad_norm": 1.8295287300120082, "learning_rate": 9.999996131989602e-06, "loss": 0.8852, "step": 475 }, { "epoch": 0.03, "grad_norm": 1.3030534311159094, "learning_rate": 9.999994735208314e-06, "loss": 0.6913, "step": 476 }, { "epoch": 0.03, "grad_norm": 1.7582169919060844, "learning_rate": 9.99999312353776e-06, "loss": 0.9168, "step": 477 }, { "epoch": 0.03, "grad_norm": 1.8950297660829105, "learning_rate": 9.999991296978006e-06, "loss": 0.9198, "step": 478 }, { "epoch": 0.03, "grad_norm": 1.9582989232978774, "learning_rate": 9.999989255529133e-06, "loss": 0.7534, "step": 479 }, { "epoch": 0.03, "grad_norm": 1.7065616227545262, "learning_rate": 9.99998699919123e-06, "loss": 0.7916, "step": 480 }, { "epoch": 0.03, "grad_norm": 1.844952352399886, "learning_rate": 9.99998452796439e-06, "loss": 0.8362, "step": 481 }, { "epoch": 0.03, "grad_norm": 1.7835055523402152, "learning_rate": 9.99998184184872e-06, "loss": 0.836, "step": 482 }, { "epoch": 0.03, "grad_norm": 1.8376658600684752, "learning_rate": 9.99997894084434e-06, "loss": 0.7558, "step": 483 }, { "epoch": 0.03, "grad_norm": 1.5808150452731748, "learning_rate": 9.999975824951372e-06, "loss": 0.811, "step": 484 }, { "epoch": 0.03, "grad_norm": 1.6287986270606079, "learning_rate": 9.999972494169947e-06, "loss": 0.8239, "step": 485 }, { "epoch": 0.03, "grad_norm": 1.8137706906472935, "learning_rate": 9.999968948500211e-06, "loss": 0.9027, "step": 486 }, { "epoch": 0.03, "grad_norm": 1.8463767545323957, "learning_rate": 9.999965187942317e-06, "loss": 0.8565, "step": 487 }, { "epoch": 0.03, "grad_norm": 1.384652545933076, "learning_rate": 9.999961212496425e-06, "loss": 0.7348, "step": 488 }, { "epoch": 0.03, "grad_norm": 2.0044452285204097, "learning_rate": 9.999957022162707e-06, "loss": 0.8558, "step": 489 }, { "epoch": 0.03, "grad_norm": 1.9891943831503147, "learning_rate": 9.999952616941342e-06, "loss": 0.8683, "step": 490 }, { "epoch": 0.03, "grad_norm": 2.0910919724743016, "learning_rate": 9.99994799683252e-06, "loss": 0.907, "step": 491 }, { "epoch": 0.03, "grad_norm": 1.8504805795402162, "learning_rate": 9.999943161836439e-06, "loss": 0.8946, "step": 492 }, { "epoch": 0.03, "grad_norm": 1.6471571414412958, "learning_rate": 9.999938111953306e-06, "loss": 0.8002, "step": 493 }, { "epoch": 0.03, "grad_norm": 1.8606429893627512, "learning_rate": 9.999932847183343e-06, "loss": 0.8708, "step": 494 }, { "epoch": 0.03, "grad_norm": 2.016361517358938, "learning_rate": 9.99992736752677e-06, "loss": 0.9001, "step": 495 }, { "epoch": 0.03, "grad_norm": 1.4801771288673897, "learning_rate": 9.999921672983826e-06, "loss": 0.6852, "step": 496 }, { "epoch": 0.03, "grad_norm": 1.9361294302377774, "learning_rate": 9.999915763554754e-06, "loss": 0.8732, "step": 497 }, { "epoch": 0.03, "grad_norm": 1.5714684302519515, "learning_rate": 9.999909639239809e-06, "loss": 0.7585, "step": 498 }, { "epoch": 0.03, "grad_norm": 1.870661381990687, "learning_rate": 9.999903300039253e-06, "loss": 0.8773, "step": 499 }, { "epoch": 0.03, "grad_norm": 1.7550844699263655, "learning_rate": 9.999896745953361e-06, "loss": 0.8116, "step": 500 }, { "epoch": 0.03, "grad_norm": 1.930226651265555, "learning_rate": 9.999889976982413e-06, "loss": 0.8269, "step": 501 }, { "epoch": 0.03, "grad_norm": 1.7509151507869878, "learning_rate": 9.9998829931267e-06, "loss": 0.7574, "step": 502 }, { "epoch": 0.03, "grad_norm": 1.6987436276358794, "learning_rate": 9.99987579438652e-06, "loss": 0.8792, "step": 503 }, { "epoch": 0.03, "grad_norm": 1.8594026854117534, "learning_rate": 9.999868380762187e-06, "loss": 0.8476, "step": 504 }, { "epoch": 0.03, "grad_norm": 2.2068937199543566, "learning_rate": 9.999860752254016e-06, "loss": 0.9266, "step": 505 }, { "epoch": 0.03, "grad_norm": 1.593060868638754, "learning_rate": 9.999852908862337e-06, "loss": 0.8706, "step": 506 }, { "epoch": 0.03, "grad_norm": 1.7770893508838266, "learning_rate": 9.999844850587486e-06, "loss": 0.8175, "step": 507 }, { "epoch": 0.03, "grad_norm": 1.6018834157637256, "learning_rate": 9.999836577429808e-06, "loss": 0.8237, "step": 508 }, { "epoch": 0.03, "grad_norm": 1.3847976146545466, "learning_rate": 9.99982808938966e-06, "loss": 0.7022, "step": 509 }, { "epoch": 0.03, "grad_norm": 1.604049454636143, "learning_rate": 9.999819386467409e-06, "loss": 0.762, "step": 510 }, { "epoch": 0.03, "grad_norm": 1.8615010705027895, "learning_rate": 9.999810468663424e-06, "loss": 0.7387, "step": 511 }, { "epoch": 0.03, "grad_norm": 1.7977658960510894, "learning_rate": 9.999801335978095e-06, "loss": 0.9117, "step": 512 }, { "epoch": 0.03, "grad_norm": 2.005056940085138, "learning_rate": 9.999791988411807e-06, "loss": 0.86, "step": 513 }, { "epoch": 0.03, "grad_norm": 1.7721420404776658, "learning_rate": 9.999782425964968e-06, "loss": 0.8456, "step": 514 }, { "epoch": 0.03, "grad_norm": 1.4515968864541495, "learning_rate": 9.999772648637984e-06, "loss": 0.6862, "step": 515 }, { "epoch": 0.03, "grad_norm": 4.671216177215456, "learning_rate": 9.999762656431277e-06, "loss": 0.7488, "step": 516 }, { "epoch": 0.03, "grad_norm": 2.1229261414085916, "learning_rate": 9.999752449345279e-06, "loss": 0.8646, "step": 517 }, { "epoch": 0.03, "grad_norm": 2.262002350296626, "learning_rate": 9.999742027380426e-06, "loss": 0.9347, "step": 518 }, { "epoch": 0.03, "grad_norm": 1.8242682750528987, "learning_rate": 9.999731390537168e-06, "loss": 0.8079, "step": 519 }, { "epoch": 0.03, "grad_norm": 3.5974729499303497, "learning_rate": 9.999720538815959e-06, "loss": 0.8735, "step": 520 }, { "epoch": 0.03, "grad_norm": 1.356586738282217, "learning_rate": 9.999709472217268e-06, "loss": 0.7883, "step": 521 }, { "epoch": 0.03, "grad_norm": 1.9100617212780628, "learning_rate": 9.999698190741569e-06, "loss": 0.8379, "step": 522 }, { "epoch": 0.03, "grad_norm": 2.345380572427569, "learning_rate": 9.999686694389348e-06, "loss": 0.8028, "step": 523 }, { "epoch": 0.03, "grad_norm": 1.789415192519978, "learning_rate": 9.999674983161099e-06, "loss": 0.8005, "step": 524 }, { "epoch": 0.03, "grad_norm": 1.7471968340599748, "learning_rate": 9.999663057057324e-06, "loss": 0.7311, "step": 525 }, { "epoch": 0.03, "grad_norm": 1.8092501112329793, "learning_rate": 9.999650916078536e-06, "loss": 0.8529, "step": 526 }, { "epoch": 0.03, "grad_norm": 1.614822099033206, "learning_rate": 9.999638560225259e-06, "loss": 0.7954, "step": 527 }, { "epoch": 0.03, "grad_norm": 1.9837042139197512, "learning_rate": 9.999625989498022e-06, "loss": 0.8891, "step": 528 }, { "epoch": 0.03, "grad_norm": 2.3042381285804328, "learning_rate": 9.999613203897365e-06, "loss": 0.8817, "step": 529 }, { "epoch": 0.03, "grad_norm": 2.054773453107983, "learning_rate": 9.999600203423837e-06, "loss": 0.8379, "step": 530 }, { "epoch": 0.03, "grad_norm": 1.6760316315985677, "learning_rate": 9.999586988078e-06, "loss": 0.7992, "step": 531 }, { "epoch": 0.03, "grad_norm": 1.9944817130875572, "learning_rate": 9.99957355786042e-06, "loss": 0.9744, "step": 532 }, { "epoch": 0.03, "grad_norm": 1.7632099361975282, "learning_rate": 9.999559912771673e-06, "loss": 0.7225, "step": 533 }, { "epoch": 0.03, "grad_norm": 1.8294614180185786, "learning_rate": 9.999546052812347e-06, "loss": 0.7442, "step": 534 }, { "epoch": 0.03, "grad_norm": 1.9938752502293249, "learning_rate": 9.999531977983038e-06, "loss": 0.8424, "step": 535 }, { "epoch": 0.03, "grad_norm": 1.4635309131050436, "learning_rate": 9.999517688284348e-06, "loss": 0.5151, "step": 536 }, { "epoch": 0.03, "grad_norm": 1.7423892968175163, "learning_rate": 9.999503183716894e-06, "loss": 0.7274, "step": 537 }, { "epoch": 0.03, "grad_norm": 1.890152832321099, "learning_rate": 9.999488464281298e-06, "loss": 0.848, "step": 538 }, { "epoch": 0.03, "grad_norm": 1.6261132433487981, "learning_rate": 9.999473529978194e-06, "loss": 0.7559, "step": 539 }, { "epoch": 0.03, "grad_norm": 1.6835608002027882, "learning_rate": 9.999458380808222e-06, "loss": 0.8587, "step": 540 }, { "epoch": 0.03, "grad_norm": 1.8192642363669052, "learning_rate": 9.999443016772037e-06, "loss": 0.9232, "step": 541 }, { "epoch": 0.03, "grad_norm": 1.8699051529034165, "learning_rate": 9.999427437870292e-06, "loss": 0.8807, "step": 542 }, { "epoch": 0.03, "grad_norm": 1.339288370182032, "learning_rate": 9.999411644103665e-06, "loss": 0.6678, "step": 543 }, { "epoch": 0.03, "grad_norm": 1.9437793678545905, "learning_rate": 9.999395635472829e-06, "loss": 0.8062, "step": 544 }, { "epoch": 0.03, "grad_norm": 1.814871355676906, "learning_rate": 9.999379411978474e-06, "loss": 0.7815, "step": 545 }, { "epoch": 0.03, "grad_norm": 1.8341644604750336, "learning_rate": 9.999362973621297e-06, "loss": 0.9362, "step": 546 }, { "epoch": 0.04, "grad_norm": 1.786941468495319, "learning_rate": 9.999346320402003e-06, "loss": 0.9097, "step": 547 }, { "epoch": 0.04, "grad_norm": 1.693506600941689, "learning_rate": 9.999329452321312e-06, "loss": 0.8169, "step": 548 }, { "epoch": 0.04, "grad_norm": 1.8331733423990466, "learning_rate": 9.999312369379944e-06, "loss": 0.9162, "step": 549 }, { "epoch": 0.04, "grad_norm": 1.803451657025406, "learning_rate": 9.999295071578637e-06, "loss": 0.8123, "step": 550 }, { "epoch": 0.04, "grad_norm": 1.7192924259404225, "learning_rate": 9.99927755891813e-06, "loss": 0.7852, "step": 551 }, { "epoch": 0.04, "grad_norm": 1.7146083607495692, "learning_rate": 9.999259831399181e-06, "loss": 0.8561, "step": 552 }, { "epoch": 0.04, "grad_norm": 2.070082920306235, "learning_rate": 9.99924188902255e-06, "loss": 0.8678, "step": 553 }, { "epoch": 0.04, "grad_norm": 1.8528993837545285, "learning_rate": 9.999223731789006e-06, "loss": 0.7988, "step": 554 }, { "epoch": 0.04, "grad_norm": 1.344559363202609, "learning_rate": 9.99920535969933e-06, "loss": 0.6651, "step": 555 }, { "epoch": 0.04, "grad_norm": 2.146832873201188, "learning_rate": 9.999186772754315e-06, "loss": 0.8714, "step": 556 }, { "epoch": 0.04, "grad_norm": 1.9248052114285568, "learning_rate": 9.999167970954756e-06, "loss": 0.8507, "step": 557 }, { "epoch": 0.04, "grad_norm": 1.89525378452406, "learning_rate": 9.99914895430146e-06, "loss": 0.7798, "step": 558 }, { "epoch": 0.04, "grad_norm": 2.047174635263574, "learning_rate": 9.999129722795248e-06, "loss": 0.8714, "step": 559 }, { "epoch": 0.04, "grad_norm": 1.866338084601025, "learning_rate": 9.999110276436947e-06, "loss": 0.8511, "step": 560 }, { "epoch": 0.04, "grad_norm": 1.7376052182864725, "learning_rate": 9.999090615227389e-06, "loss": 0.886, "step": 561 }, { "epoch": 0.04, "grad_norm": 1.9060418137748234, "learning_rate": 9.999070739167423e-06, "loss": 0.8244, "step": 562 }, { "epoch": 0.04, "grad_norm": 1.6889365191874275, "learning_rate": 9.999050648257898e-06, "loss": 0.8473, "step": 563 }, { "epoch": 0.04, "grad_norm": 2.1882624290212025, "learning_rate": 9.999030342499682e-06, "loss": 0.7131, "step": 564 }, { "epoch": 0.04, "grad_norm": 1.668178368493247, "learning_rate": 9.999009821893648e-06, "loss": 0.7821, "step": 565 }, { "epoch": 0.04, "grad_norm": 1.7956391855296634, "learning_rate": 9.998989086440673e-06, "loss": 0.9013, "step": 566 }, { "epoch": 0.04, "grad_norm": 1.9590677223075175, "learning_rate": 9.998968136141655e-06, "loss": 0.8819, "step": 567 }, { "epoch": 0.04, "grad_norm": 1.2976093373859001, "learning_rate": 9.998946970997489e-06, "loss": 0.7791, "step": 568 }, { "epoch": 0.04, "grad_norm": 1.3863746479030314, "learning_rate": 9.998925591009086e-06, "loss": 0.6564, "step": 569 }, { "epoch": 0.04, "grad_norm": 1.9763140100823522, "learning_rate": 9.998903996177365e-06, "loss": 0.8222, "step": 570 }, { "epoch": 0.04, "grad_norm": 1.7881950427801796, "learning_rate": 9.998882186503256e-06, "loss": 0.9516, "step": 571 }, { "epoch": 0.04, "grad_norm": 1.912802031364536, "learning_rate": 9.998860161987693e-06, "loss": 0.8128, "step": 572 }, { "epoch": 0.04, "grad_norm": 1.450783375855501, "learning_rate": 9.998837922631625e-06, "loss": 0.7613, "step": 573 }, { "epoch": 0.04, "grad_norm": 1.7654842779803874, "learning_rate": 9.998815468436007e-06, "loss": 0.8626, "step": 574 }, { "epoch": 0.04, "grad_norm": 2.2057411052843974, "learning_rate": 9.998792799401804e-06, "loss": 0.8762, "step": 575 }, { "epoch": 0.04, "grad_norm": 1.8600432778861404, "learning_rate": 9.998769915529991e-06, "loss": 0.8236, "step": 576 }, { "epoch": 0.04, "grad_norm": 2.171645714115359, "learning_rate": 9.998746816821551e-06, "loss": 0.9493, "step": 577 }, { "epoch": 0.04, "grad_norm": 2.007506888714666, "learning_rate": 9.998723503277476e-06, "loss": 0.8661, "step": 578 }, { "epoch": 0.04, "grad_norm": 1.3163844488857332, "learning_rate": 9.99869997489877e-06, "loss": 0.6522, "step": 579 }, { "epoch": 0.04, "grad_norm": 1.7380297784724377, "learning_rate": 9.99867623168644e-06, "loss": 0.938, "step": 580 }, { "epoch": 0.04, "grad_norm": 1.729791122036369, "learning_rate": 9.99865227364151e-06, "loss": 0.8873, "step": 581 }, { "epoch": 0.04, "grad_norm": 2.0530833945348923, "learning_rate": 9.99862810076501e-06, "loss": 0.8179, "step": 582 }, { "epoch": 0.04, "grad_norm": 2.01790884750594, "learning_rate": 9.998603713057977e-06, "loss": 0.8618, "step": 583 }, { "epoch": 0.04, "grad_norm": 1.6568395057592218, "learning_rate": 9.99857911052146e-06, "loss": 0.8248, "step": 584 }, { "epoch": 0.04, "grad_norm": 1.3611971654941684, "learning_rate": 9.998554293156518e-06, "loss": 0.7706, "step": 585 }, { "epoch": 0.04, "grad_norm": 2.946764807455744, "learning_rate": 9.998529260964214e-06, "loss": 1.007, "step": 586 }, { "epoch": 0.04, "grad_norm": 1.7104243694535493, "learning_rate": 9.998504013945627e-06, "loss": 0.9095, "step": 587 }, { "epoch": 0.04, "grad_norm": 3.4117130868534162, "learning_rate": 9.99847855210184e-06, "loss": 0.7886, "step": 588 }, { "epoch": 0.04, "grad_norm": 2.0839893806244505, "learning_rate": 9.998452875433948e-06, "loss": 0.8285, "step": 589 }, { "epoch": 0.04, "grad_norm": 1.7674756128803313, "learning_rate": 9.998426983943055e-06, "loss": 0.7872, "step": 590 }, { "epoch": 0.04, "grad_norm": 1.7725670098652684, "learning_rate": 9.998400877630272e-06, "loss": 0.8291, "step": 591 }, { "epoch": 0.04, "grad_norm": 1.491808370324709, "learning_rate": 9.998374556496724e-06, "loss": 0.796, "step": 592 }, { "epoch": 0.04, "grad_norm": 1.595029043602263, "learning_rate": 9.99834802054354e-06, "loss": 0.8034, "step": 593 }, { "epoch": 0.04, "grad_norm": 1.6763151002939822, "learning_rate": 9.998321269771862e-06, "loss": 0.7767, "step": 594 }, { "epoch": 0.04, "grad_norm": 1.362857217832537, "learning_rate": 9.998294304182837e-06, "loss": 0.668, "step": 595 }, { "epoch": 0.04, "grad_norm": 2.063463444405761, "learning_rate": 9.998267123777628e-06, "loss": 0.8586, "step": 596 }, { "epoch": 0.04, "grad_norm": 1.96286728185719, "learning_rate": 9.998239728557399e-06, "loss": 0.8932, "step": 597 }, { "epoch": 0.04, "grad_norm": 1.8742921228513378, "learning_rate": 9.99821211852333e-06, "loss": 0.8958, "step": 598 }, { "epoch": 0.04, "grad_norm": 1.5835730114582487, "learning_rate": 9.998184293676606e-06, "loss": 0.7922, "step": 599 }, { "epoch": 0.04, "grad_norm": 1.821107947320514, "learning_rate": 9.998156254018423e-06, "loss": 0.8537, "step": 600 }, { "epoch": 0.04, "grad_norm": 1.9868690491181298, "learning_rate": 9.998127999549988e-06, "loss": 0.8998, "step": 601 }, { "epoch": 0.04, "grad_norm": 1.5267567516406562, "learning_rate": 9.998099530272514e-06, "loss": 0.7331, "step": 602 }, { "epoch": 0.04, "grad_norm": 1.8335743530370054, "learning_rate": 9.998070846187225e-06, "loss": 0.8573, "step": 603 }, { "epoch": 0.04, "grad_norm": 1.499701522631892, "learning_rate": 9.998041947295353e-06, "loss": 0.8317, "step": 604 }, { "epoch": 0.04, "grad_norm": 1.6048965745650317, "learning_rate": 9.99801283359814e-06, "loss": 0.8309, "step": 605 }, { "epoch": 0.04, "grad_norm": 1.267544494187693, "learning_rate": 9.99798350509684e-06, "loss": 0.6528, "step": 606 }, { "epoch": 0.04, "grad_norm": 1.1479952770943131, "learning_rate": 9.997953961792708e-06, "loss": 0.6612, "step": 607 }, { "epoch": 0.04, "grad_norm": 2.059706236608989, "learning_rate": 9.997924203687018e-06, "loss": 0.8369, "step": 608 }, { "epoch": 0.04, "grad_norm": 1.944068238545091, "learning_rate": 9.997894230781048e-06, "loss": 0.9056, "step": 609 }, { "epoch": 0.04, "grad_norm": 1.9158158828659637, "learning_rate": 9.997864043076087e-06, "loss": 0.8043, "step": 610 }, { "epoch": 0.04, "grad_norm": 1.465178800440878, "learning_rate": 9.99783364057343e-06, "loss": 0.7744, "step": 611 }, { "epoch": 0.04, "grad_norm": 2.227960174307878, "learning_rate": 9.997803023274384e-06, "loss": 0.7229, "step": 612 }, { "epoch": 0.04, "grad_norm": 2.635788681947252, "learning_rate": 9.997772191180269e-06, "loss": 0.8791, "step": 613 }, { "epoch": 0.04, "grad_norm": 1.928799437447679, "learning_rate": 9.997741144292406e-06, "loss": 0.693, "step": 614 }, { "epoch": 0.04, "grad_norm": 1.5803294386789921, "learning_rate": 9.997709882612128e-06, "loss": 0.7813, "step": 615 }, { "epoch": 0.04, "grad_norm": 1.1685488250992964, "learning_rate": 9.997678406140783e-06, "loss": 0.6307, "step": 616 }, { "epoch": 0.04, "grad_norm": 1.7243431632497146, "learning_rate": 9.99764671487972e-06, "loss": 0.8714, "step": 617 }, { "epoch": 0.04, "grad_norm": 1.9001831478275524, "learning_rate": 9.997614808830305e-06, "loss": 0.8422, "step": 618 }, { "epoch": 0.04, "grad_norm": 2.217985760483623, "learning_rate": 9.997582687993905e-06, "loss": 0.8954, "step": 619 }, { "epoch": 0.04, "grad_norm": 2.782719100550166, "learning_rate": 9.997550352371903e-06, "loss": 0.8261, "step": 620 }, { "epoch": 0.04, "grad_norm": 1.688656351303894, "learning_rate": 9.99751780196569e-06, "loss": 0.8024, "step": 621 }, { "epoch": 0.04, "grad_norm": 2.1578930034513744, "learning_rate": 9.997485036776662e-06, "loss": 0.8304, "step": 622 }, { "epoch": 0.04, "grad_norm": 1.5825243599030856, "learning_rate": 9.997452056806226e-06, "loss": 0.7893, "step": 623 }, { "epoch": 0.04, "grad_norm": 1.4606823896056202, "learning_rate": 9.997418862055804e-06, "loss": 0.7869, "step": 624 }, { "epoch": 0.04, "grad_norm": 2.211500674047886, "learning_rate": 9.99738545252682e-06, "loss": 0.7525, "step": 625 }, { "epoch": 0.04, "grad_norm": 1.2481889635978611, "learning_rate": 9.997351828220711e-06, "loss": 0.7284, "step": 626 }, { "epoch": 0.04, "grad_norm": 1.8351619521169615, "learning_rate": 9.99731798913892e-06, "loss": 0.7314, "step": 627 }, { "epoch": 0.04, "grad_norm": 1.275693359426726, "learning_rate": 9.997283935282903e-06, "loss": 0.6776, "step": 628 }, { "epoch": 0.04, "grad_norm": 1.8951811025463934, "learning_rate": 9.997249666654122e-06, "loss": 0.7806, "step": 629 }, { "epoch": 0.04, "grad_norm": 1.5864137720134441, "learning_rate": 9.997215183254053e-06, "loss": 0.7623, "step": 630 }, { "epoch": 0.04, "grad_norm": 1.4685733178289022, "learning_rate": 9.997180485084175e-06, "loss": 0.6613, "step": 631 }, { "epoch": 0.04, "grad_norm": 1.2373780434993908, "learning_rate": 9.997145572145981e-06, "loss": 0.7208, "step": 632 }, { "epoch": 0.04, "grad_norm": 1.742705355774268, "learning_rate": 9.99711044444097e-06, "loss": 0.7726, "step": 633 }, { "epoch": 0.04, "grad_norm": 1.9096975415590016, "learning_rate": 9.997075101970652e-06, "loss": 0.8608, "step": 634 }, { "epoch": 0.04, "grad_norm": 1.7181347606191635, "learning_rate": 9.997039544736547e-06, "loss": 0.8152, "step": 635 }, { "epoch": 0.04, "grad_norm": 1.6208647320405167, "learning_rate": 9.997003772740183e-06, "loss": 0.751, "step": 636 }, { "epoch": 0.04, "grad_norm": 2.107117075287744, "learning_rate": 9.996967785983097e-06, "loss": 0.8331, "step": 637 }, { "epoch": 0.04, "grad_norm": 1.4584528720883259, "learning_rate": 9.996931584466836e-06, "loss": 0.7486, "step": 638 }, { "epoch": 0.04, "grad_norm": 1.5866675798598233, "learning_rate": 9.996895168192954e-06, "loss": 0.8194, "step": 639 }, { "epoch": 0.04, "grad_norm": 2.4023697087509377, "learning_rate": 9.996858537163019e-06, "loss": 0.8962, "step": 640 }, { "epoch": 0.04, "grad_norm": 1.5646488728997838, "learning_rate": 9.996821691378603e-06, "loss": 0.7125, "step": 641 }, { "epoch": 0.04, "grad_norm": 1.7881536568108198, "learning_rate": 9.996784630841293e-06, "loss": 0.7989, "step": 642 }, { "epoch": 0.04, "grad_norm": 1.8177198108367563, "learning_rate": 9.996747355552675e-06, "loss": 0.8114, "step": 643 }, { "epoch": 0.04, "grad_norm": 2.134156018856502, "learning_rate": 9.996709865514357e-06, "loss": 0.8547, "step": 644 }, { "epoch": 0.04, "grad_norm": 2.567784649551899, "learning_rate": 9.99667216072795e-06, "loss": 0.8503, "step": 645 }, { "epoch": 0.04, "grad_norm": 1.5900744624604537, "learning_rate": 9.996634241195071e-06, "loss": 0.724, "step": 646 }, { "epoch": 0.04, "grad_norm": 2.233732491081797, "learning_rate": 9.996596106917353e-06, "loss": 0.8892, "step": 647 }, { "epoch": 0.04, "grad_norm": 1.2825888366059248, "learning_rate": 9.996557757896432e-06, "loss": 0.6685, "step": 648 }, { "epoch": 0.04, "grad_norm": 1.8645765194210882, "learning_rate": 9.99651919413396e-06, "loss": 0.8237, "step": 649 }, { "epoch": 0.04, "grad_norm": 1.74532837430669, "learning_rate": 9.996480415631592e-06, "loss": 0.8688, "step": 650 }, { "epoch": 0.04, "grad_norm": 1.8390187949980599, "learning_rate": 9.996441422390994e-06, "loss": 0.8376, "step": 651 }, { "epoch": 0.04, "grad_norm": 1.1025967532485195, "learning_rate": 9.996402214413841e-06, "loss": 0.6437, "step": 652 }, { "epoch": 0.04, "grad_norm": 1.5311281427057681, "learning_rate": 9.996362791701822e-06, "loss": 0.8102, "step": 653 }, { "epoch": 0.04, "grad_norm": 1.9562853529732152, "learning_rate": 9.996323154256628e-06, "loss": 0.8657, "step": 654 }, { "epoch": 0.04, "grad_norm": 1.7632699524735693, "learning_rate": 9.996283302079965e-06, "loss": 0.8632, "step": 655 }, { "epoch": 0.04, "grad_norm": 1.1308453719463478, "learning_rate": 9.996243235173541e-06, "loss": 0.72, "step": 656 }, { "epoch": 0.04, "grad_norm": 1.7903818366043438, "learning_rate": 9.996202953539085e-06, "loss": 0.8645, "step": 657 }, { "epoch": 0.04, "grad_norm": 1.791997179923725, "learning_rate": 9.996162457178322e-06, "loss": 0.8857, "step": 658 }, { "epoch": 0.04, "grad_norm": 2.0014836548381485, "learning_rate": 9.996121746092996e-06, "loss": 0.7748, "step": 659 }, { "epoch": 0.04, "grad_norm": 1.6478720593690013, "learning_rate": 9.996080820284857e-06, "loss": 0.7149, "step": 660 }, { "epoch": 0.04, "grad_norm": 1.710658938798865, "learning_rate": 9.99603967975566e-06, "loss": 0.8727, "step": 661 }, { "epoch": 0.04, "grad_norm": 1.6743769943465618, "learning_rate": 9.995998324507177e-06, "loss": 0.7679, "step": 662 }, { "epoch": 0.04, "grad_norm": 1.571385450888377, "learning_rate": 9.995956754541185e-06, "loss": 0.8029, "step": 663 }, { "epoch": 0.04, "grad_norm": 1.661708083071402, "learning_rate": 9.995914969859469e-06, "loss": 0.7454, "step": 664 }, { "epoch": 0.04, "grad_norm": 1.64905158154974, "learning_rate": 9.995872970463824e-06, "loss": 0.8057, "step": 665 }, { "epoch": 0.04, "grad_norm": 1.6893774997398785, "learning_rate": 9.995830756356058e-06, "loss": 0.8362, "step": 666 }, { "epoch": 0.04, "grad_norm": 1.7816404888843225, "learning_rate": 9.995788327537983e-06, "loss": 0.8523, "step": 667 }, { "epoch": 0.04, "grad_norm": 1.6392409889659902, "learning_rate": 9.995745684011424e-06, "loss": 0.7851, "step": 668 }, { "epoch": 0.04, "grad_norm": 1.6405380697833807, "learning_rate": 9.995702825778213e-06, "loss": 0.7779, "step": 669 }, { "epoch": 0.04, "grad_norm": 1.5561405230501975, "learning_rate": 9.99565975284019e-06, "loss": 0.8215, "step": 670 }, { "epoch": 0.04, "grad_norm": 1.772560412199757, "learning_rate": 9.995616465199209e-06, "loss": 0.8171, "step": 671 }, { "epoch": 0.04, "grad_norm": 1.9219945869874224, "learning_rate": 9.995572962857132e-06, "loss": 0.8102, "step": 672 }, { "epoch": 0.04, "grad_norm": 1.937902759646655, "learning_rate": 9.995529245815824e-06, "loss": 0.994, "step": 673 }, { "epoch": 0.04, "grad_norm": 1.6340439690310344, "learning_rate": 9.995485314077167e-06, "loss": 0.8541, "step": 674 }, { "epoch": 0.04, "grad_norm": 1.2675786399494626, "learning_rate": 9.995441167643048e-06, "loss": 0.7403, "step": 675 }, { "epoch": 0.04, "grad_norm": 1.7565483289637713, "learning_rate": 9.995396806515363e-06, "loss": 0.8921, "step": 676 }, { "epoch": 0.04, "grad_norm": 1.6798315406959603, "learning_rate": 9.995352230696021e-06, "loss": 0.848, "step": 677 }, { "epoch": 0.04, "grad_norm": 1.6773803523558062, "learning_rate": 9.995307440186937e-06, "loss": 0.8754, "step": 678 }, { "epoch": 0.04, "grad_norm": 1.6500516491749504, "learning_rate": 9.995262434990036e-06, "loss": 0.7963, "step": 679 }, { "epoch": 0.04, "grad_norm": 1.543198899300495, "learning_rate": 9.995217215107251e-06, "loss": 0.7815, "step": 680 }, { "epoch": 0.04, "grad_norm": 1.572859417449721, "learning_rate": 9.995171780540528e-06, "loss": 0.7871, "step": 681 }, { "epoch": 0.04, "grad_norm": 1.987632334964032, "learning_rate": 9.995126131291818e-06, "loss": 0.8211, "step": 682 }, { "epoch": 0.04, "grad_norm": 1.6440233990482203, "learning_rate": 9.995080267363082e-06, "loss": 0.9076, "step": 683 }, { "epoch": 0.04, "grad_norm": 1.8342015063724686, "learning_rate": 9.995034188756294e-06, "loss": 0.9248, "step": 684 }, { "epoch": 0.04, "grad_norm": 12.625151864714672, "learning_rate": 9.994987895473431e-06, "loss": 0.7224, "step": 685 }, { "epoch": 0.04, "grad_norm": 1.8039346567735581, "learning_rate": 9.994941387516484e-06, "loss": 0.8977, "step": 686 }, { "epoch": 0.04, "grad_norm": 1.302321704996743, "learning_rate": 9.994894664887453e-06, "loss": 0.7542, "step": 687 }, { "epoch": 0.04, "grad_norm": 1.7505555018610472, "learning_rate": 9.994847727588344e-06, "loss": 0.8668, "step": 688 }, { "epoch": 0.04, "grad_norm": 2.129885566514705, "learning_rate": 9.994800575621176e-06, "loss": 0.8146, "step": 689 }, { "epoch": 0.04, "grad_norm": 1.8213465783176008, "learning_rate": 9.994753208987974e-06, "loss": 0.8732, "step": 690 }, { "epoch": 0.04, "grad_norm": 3.4361898535043993, "learning_rate": 9.994705627690777e-06, "loss": 0.851, "step": 691 }, { "epoch": 0.04, "grad_norm": 1.928926256354512, "learning_rate": 9.994657831731624e-06, "loss": 0.8001, "step": 692 }, { "epoch": 0.04, "grad_norm": 1.8071044778606058, "learning_rate": 9.994609821112576e-06, "loss": 0.8681, "step": 693 }, { "epoch": 0.04, "grad_norm": 2.470620482603179, "learning_rate": 9.99456159583569e-06, "loss": 0.8564, "step": 694 }, { "epoch": 0.04, "grad_norm": 1.9248529209151586, "learning_rate": 9.994513155903042e-06, "loss": 0.8451, "step": 695 }, { "epoch": 0.04, "grad_norm": 1.1087982159352379, "learning_rate": 9.994464501316715e-06, "loss": 0.6852, "step": 696 }, { "epoch": 0.04, "grad_norm": 2.2680593627928234, "learning_rate": 9.994415632078797e-06, "loss": 0.6731, "step": 697 }, { "epoch": 0.04, "grad_norm": 1.204997707577994, "learning_rate": 9.994366548191393e-06, "loss": 0.7028, "step": 698 }, { "epoch": 0.04, "grad_norm": 1.3157918485937417, "learning_rate": 9.994317249656607e-06, "loss": 0.7342, "step": 699 }, { "epoch": 0.04, "grad_norm": 1.991981074727401, "learning_rate": 9.99426773647656e-06, "loss": 0.8255, "step": 700 }, { "epoch": 0.04, "grad_norm": 1.6196357965955943, "learning_rate": 9.994218008653381e-06, "loss": 0.8692, "step": 701 }, { "epoch": 0.04, "grad_norm": 1.6439459777819554, "learning_rate": 9.994168066189205e-06, "loss": 0.807, "step": 702 }, { "epoch": 0.04, "grad_norm": 1.6841125984389695, "learning_rate": 9.994117909086179e-06, "loss": 0.7391, "step": 703 }, { "epoch": 0.05, "grad_norm": 1.7460969637367065, "learning_rate": 9.994067537346461e-06, "loss": 0.8694, "step": 704 }, { "epoch": 0.05, "grad_norm": 1.058480063160555, "learning_rate": 9.994016950972214e-06, "loss": 0.7322, "step": 705 }, { "epoch": 0.05, "grad_norm": 1.6657507137050596, "learning_rate": 9.993966149965613e-06, "loss": 0.8005, "step": 706 }, { "epoch": 0.05, "grad_norm": 1.9390855718628324, "learning_rate": 9.99391513432884e-06, "loss": 0.7821, "step": 707 }, { "epoch": 0.05, "grad_norm": 2.0092839023076663, "learning_rate": 9.993863904064087e-06, "loss": 0.8751, "step": 708 }, { "epoch": 0.05, "grad_norm": 1.7774311823970939, "learning_rate": 9.993812459173557e-06, "loss": 0.8009, "step": 709 }, { "epoch": 0.05, "grad_norm": 1.8490536883678763, "learning_rate": 9.993760799659463e-06, "loss": 0.7692, "step": 710 }, { "epoch": 0.05, "grad_norm": 1.8546158703505093, "learning_rate": 9.993708925524022e-06, "loss": 0.8852, "step": 711 }, { "epoch": 0.05, "grad_norm": 1.5094776157497096, "learning_rate": 9.993656836769464e-06, "loss": 0.7433, "step": 712 }, { "epoch": 0.05, "grad_norm": 1.6837792990247413, "learning_rate": 9.993604533398029e-06, "loss": 0.7747, "step": 713 }, { "epoch": 0.05, "grad_norm": 1.8219970915086379, "learning_rate": 9.993552015411965e-06, "loss": 0.8096, "step": 714 }, { "epoch": 0.05, "grad_norm": 1.1983058929276202, "learning_rate": 9.993499282813528e-06, "loss": 0.6548, "step": 715 }, { "epoch": 0.05, "grad_norm": 1.8179192952713932, "learning_rate": 9.993446335604983e-06, "loss": 0.8648, "step": 716 }, { "epoch": 0.05, "grad_norm": 2.0560147375823075, "learning_rate": 9.993393173788608e-06, "loss": 0.8466, "step": 717 }, { "epoch": 0.05, "grad_norm": 1.9881989537340898, "learning_rate": 9.993339797366687e-06, "loss": 0.8938, "step": 718 }, { "epoch": 0.05, "grad_norm": 1.3599990827659052, "learning_rate": 9.993286206341515e-06, "loss": 0.6853, "step": 719 }, { "epoch": 0.05, "grad_norm": 2.060872255285467, "learning_rate": 9.993232400715394e-06, "loss": 0.7351, "step": 720 }, { "epoch": 0.05, "grad_norm": 1.720645143773739, "learning_rate": 9.993178380490636e-06, "loss": 0.8005, "step": 721 }, { "epoch": 0.05, "grad_norm": 1.8347225951112043, "learning_rate": 9.993124145669563e-06, "loss": 0.9001, "step": 722 }, { "epoch": 0.05, "grad_norm": 1.7932183054459387, "learning_rate": 9.993069696254506e-06, "loss": 0.8598, "step": 723 }, { "epoch": 0.05, "grad_norm": 1.8170577105066659, "learning_rate": 9.993015032247806e-06, "loss": 0.8484, "step": 724 }, { "epoch": 0.05, "grad_norm": 1.5478923693448507, "learning_rate": 9.992960153651812e-06, "loss": 0.7769, "step": 725 }, { "epoch": 0.05, "grad_norm": 1.3542824831702565, "learning_rate": 9.992905060468882e-06, "loss": 0.7234, "step": 726 }, { "epoch": 0.05, "grad_norm": 2.2039610093150714, "learning_rate": 9.992849752701384e-06, "loss": 0.8426, "step": 727 }, { "epoch": 0.05, "grad_norm": 1.8473963840689998, "learning_rate": 9.992794230351695e-06, "loss": 0.8341, "step": 728 }, { "epoch": 0.05, "grad_norm": 1.1294644560666383, "learning_rate": 9.9927384934222e-06, "loss": 0.5957, "step": 729 }, { "epoch": 0.05, "grad_norm": 1.9942645668295114, "learning_rate": 9.992682541915297e-06, "loss": 0.8535, "step": 730 }, { "epoch": 0.05, "grad_norm": 1.7271986565547481, "learning_rate": 9.99262637583339e-06, "loss": 0.808, "step": 731 }, { "epoch": 0.05, "grad_norm": 1.4449028043884011, "learning_rate": 9.992569995178891e-06, "loss": 0.785, "step": 732 }, { "epoch": 0.05, "grad_norm": 1.560578037381746, "learning_rate": 9.992513399954225e-06, "loss": 0.7653, "step": 733 }, { "epoch": 0.05, "grad_norm": 1.9566526056393356, "learning_rate": 9.992456590161825e-06, "loss": 0.838, "step": 734 }, { "epoch": 0.05, "grad_norm": 1.9196674686503457, "learning_rate": 9.99239956580413e-06, "loss": 0.883, "step": 735 }, { "epoch": 0.05, "grad_norm": 1.6751613914611911, "learning_rate": 9.992342326883591e-06, "loss": 0.7851, "step": 736 }, { "epoch": 0.05, "grad_norm": 1.7528533764865257, "learning_rate": 9.99228487340267e-06, "loss": 0.8531, "step": 737 }, { "epoch": 0.05, "grad_norm": 1.2459532174488632, "learning_rate": 9.992227205363837e-06, "loss": 0.6226, "step": 738 }, { "epoch": 0.05, "grad_norm": 2.4736832356551517, "learning_rate": 9.992169322769568e-06, "loss": 0.7757, "step": 739 }, { "epoch": 0.05, "grad_norm": 1.700919560122714, "learning_rate": 9.99211122562235e-06, "loss": 0.7572, "step": 740 }, { "epoch": 0.05, "grad_norm": 1.7674646640009553, "learning_rate": 9.992052913924683e-06, "loss": 0.701, "step": 741 }, { "epoch": 0.05, "grad_norm": 1.6881293473208825, "learning_rate": 9.99199438767907e-06, "loss": 0.7486, "step": 742 }, { "epoch": 0.05, "grad_norm": 1.664763578088777, "learning_rate": 9.991935646888031e-06, "loss": 0.9625, "step": 743 }, { "epoch": 0.05, "grad_norm": 1.7527302266978293, "learning_rate": 9.991876691554086e-06, "loss": 0.8172, "step": 744 }, { "epoch": 0.05, "grad_norm": 1.6736237533348404, "learning_rate": 9.991817521679769e-06, "loss": 0.8082, "step": 745 }, { "epoch": 0.05, "grad_norm": 1.684803838163961, "learning_rate": 9.991758137267625e-06, "loss": 0.7729, "step": 746 }, { "epoch": 0.05, "grad_norm": 1.2407430547832956, "learning_rate": 9.991698538320205e-06, "loss": 0.7769, "step": 747 }, { "epoch": 0.05, "grad_norm": 1.245955097829141, "learning_rate": 9.99163872484007e-06, "loss": 0.7049, "step": 748 }, { "epoch": 0.05, "grad_norm": 1.6557691366503249, "learning_rate": 9.991578696829793e-06, "loss": 0.8497, "step": 749 }, { "epoch": 0.05, "grad_norm": 1.914907640188716, "learning_rate": 9.99151845429195e-06, "loss": 0.826, "step": 750 }, { "epoch": 0.05, "grad_norm": 1.245015951321012, "learning_rate": 9.991457997229136e-06, "loss": 0.692, "step": 751 }, { "epoch": 0.05, "grad_norm": 1.883935517865208, "learning_rate": 9.991397325643943e-06, "loss": 0.8809, "step": 752 }, { "epoch": 0.05, "grad_norm": 1.9365547083922738, "learning_rate": 9.991336439538983e-06, "loss": 0.8927, "step": 753 }, { "epoch": 0.05, "grad_norm": 1.8388996099018942, "learning_rate": 9.99127533891687e-06, "loss": 0.7956, "step": 754 }, { "epoch": 0.05, "grad_norm": 1.7663440148609437, "learning_rate": 9.991214023780232e-06, "loss": 0.9004, "step": 755 }, { "epoch": 0.05, "grad_norm": 1.6112726490696208, "learning_rate": 9.991152494131702e-06, "loss": 0.8724, "step": 756 }, { "epoch": 0.05, "grad_norm": 1.6763809662661073, "learning_rate": 9.991090749973926e-06, "loss": 0.7497, "step": 757 }, { "epoch": 0.05, "grad_norm": 1.776134662357326, "learning_rate": 9.991028791309558e-06, "loss": 0.8401, "step": 758 }, { "epoch": 0.05, "grad_norm": 2.2119255581037045, "learning_rate": 9.99096661814126e-06, "loss": 0.8355, "step": 759 }, { "epoch": 0.05, "grad_norm": 1.8005520953098555, "learning_rate": 9.990904230471704e-06, "loss": 0.8743, "step": 760 }, { "epoch": 0.05, "grad_norm": 1.1905209031393242, "learning_rate": 9.990841628303571e-06, "loss": 0.7605, "step": 761 }, { "epoch": 0.05, "grad_norm": 1.6419387245713832, "learning_rate": 9.990778811639553e-06, "loss": 0.7793, "step": 762 }, { "epoch": 0.05, "grad_norm": 1.3780268850340307, "learning_rate": 9.990715780482348e-06, "loss": 0.6038, "step": 763 }, { "epoch": 0.05, "grad_norm": 1.6996166037358689, "learning_rate": 9.990652534834666e-06, "loss": 0.8066, "step": 764 }, { "epoch": 0.05, "grad_norm": 1.7287422108949502, "learning_rate": 9.990589074699225e-06, "loss": 0.8101, "step": 765 }, { "epoch": 0.05, "grad_norm": 1.694341227678109, "learning_rate": 9.990525400078752e-06, "loss": 0.704, "step": 766 }, { "epoch": 0.05, "grad_norm": 1.4081590452628883, "learning_rate": 9.990461510975983e-06, "loss": 0.7222, "step": 767 }, { "epoch": 0.05, "grad_norm": 1.966430814944471, "learning_rate": 9.990397407393668e-06, "loss": 0.7957, "step": 768 }, { "epoch": 0.05, "grad_norm": 1.704963065328622, "learning_rate": 9.990333089334556e-06, "loss": 0.8095, "step": 769 }, { "epoch": 0.05, "grad_norm": 2.204864773577963, "learning_rate": 9.990268556801413e-06, "loss": 0.8358, "step": 770 }, { "epoch": 0.05, "grad_norm": 1.5643666785208763, "learning_rate": 9.990203809797014e-06, "loss": 0.7768, "step": 771 }, { "epoch": 0.05, "grad_norm": 2.1966762068155536, "learning_rate": 9.990138848324142e-06, "loss": 0.7834, "step": 772 }, { "epoch": 0.05, "grad_norm": 1.5459335298084063, "learning_rate": 9.990073672385588e-06, "loss": 0.6571, "step": 773 }, { "epoch": 0.05, "grad_norm": 1.4800379891198172, "learning_rate": 9.990008281984154e-06, "loss": 0.7233, "step": 774 }, { "epoch": 0.05, "grad_norm": 1.6420736154314959, "learning_rate": 9.989942677122648e-06, "loss": 0.8111, "step": 775 }, { "epoch": 0.05, "grad_norm": 2.225740345276188, "learning_rate": 9.989876857803891e-06, "loss": 0.9648, "step": 776 }, { "epoch": 0.05, "grad_norm": 1.7768505989654872, "learning_rate": 9.989810824030712e-06, "loss": 0.7763, "step": 777 }, { "epoch": 0.05, "grad_norm": 1.849172489749472, "learning_rate": 9.989744575805951e-06, "loss": 0.8143, "step": 778 }, { "epoch": 0.05, "grad_norm": 1.76622357868332, "learning_rate": 9.989678113132451e-06, "loss": 0.794, "step": 779 }, { "epoch": 0.05, "grad_norm": 1.8709651981031157, "learning_rate": 9.98961143601307e-06, "loss": 0.8018, "step": 780 }, { "epoch": 0.05, "grad_norm": 1.8054634349909078, "learning_rate": 9.989544544450675e-06, "loss": 0.7519, "step": 781 }, { "epoch": 0.05, "grad_norm": 1.7903279476984124, "learning_rate": 9.989477438448138e-06, "loss": 0.8927, "step": 782 }, { "epoch": 0.05, "grad_norm": 1.372774379948072, "learning_rate": 9.989410118008348e-06, "loss": 0.7031, "step": 783 }, { "epoch": 0.05, "grad_norm": 1.3066365477134183, "learning_rate": 9.989342583134194e-06, "loss": 0.6278, "step": 784 }, { "epoch": 0.05, "grad_norm": 1.8067106311120367, "learning_rate": 9.98927483382858e-06, "loss": 0.8217, "step": 785 }, { "epoch": 0.05, "grad_norm": 1.7767938175256455, "learning_rate": 9.989206870094416e-06, "loss": 0.7947, "step": 786 }, { "epoch": 0.05, "grad_norm": 1.5533795482092454, "learning_rate": 9.989138691934628e-06, "loss": 0.803, "step": 787 }, { "epoch": 0.05, "grad_norm": 1.1303490778833296, "learning_rate": 9.98907029935214e-06, "loss": 0.6725, "step": 788 }, { "epoch": 0.05, "grad_norm": 1.6380698288485795, "learning_rate": 9.989001692349894e-06, "loss": 0.849, "step": 789 }, { "epoch": 0.05, "grad_norm": 1.952629398486116, "learning_rate": 9.98893287093084e-06, "loss": 0.8312, "step": 790 }, { "epoch": 0.05, "grad_norm": 2.062124051790573, "learning_rate": 9.988863835097934e-06, "loss": 0.8083, "step": 791 }, { "epoch": 0.05, "grad_norm": 1.2634163698289973, "learning_rate": 9.988794584854143e-06, "loss": 0.6779, "step": 792 }, { "epoch": 0.05, "grad_norm": 1.7568198087675864, "learning_rate": 9.988725120202442e-06, "loss": 0.9481, "step": 793 }, { "epoch": 0.05, "grad_norm": 1.819064402877648, "learning_rate": 9.98865544114582e-06, "loss": 0.763, "step": 794 }, { "epoch": 0.05, "grad_norm": 1.658225068847825, "learning_rate": 9.98858554768727e-06, "loss": 0.7701, "step": 795 }, { "epoch": 0.05, "grad_norm": 1.7668520353431962, "learning_rate": 9.988515439829795e-06, "loss": 0.8267, "step": 796 }, { "epoch": 0.05, "grad_norm": 1.5889777056350607, "learning_rate": 9.988445117576408e-06, "loss": 0.8522, "step": 797 }, { "epoch": 0.05, "grad_norm": 1.654733239796124, "learning_rate": 9.988374580930133e-06, "loss": 0.8093, "step": 798 }, { "epoch": 0.05, "grad_norm": 1.8170302756357204, "learning_rate": 9.988303829894002e-06, "loss": 0.8541, "step": 799 }, { "epoch": 0.05, "grad_norm": 1.7729488310634756, "learning_rate": 9.988232864471053e-06, "loss": 0.8251, "step": 800 }, { "epoch": 0.05, "grad_norm": 1.2376648466830145, "learning_rate": 9.988161684664336e-06, "loss": 0.7173, "step": 801 }, { "epoch": 0.05, "grad_norm": 2.0570328507266042, "learning_rate": 9.988090290476911e-06, "loss": 0.7779, "step": 802 }, { "epoch": 0.05, "grad_norm": 1.8986793293723123, "learning_rate": 9.988018681911849e-06, "loss": 0.8276, "step": 803 }, { "epoch": 0.05, "grad_norm": 1.874989411021354, "learning_rate": 9.987946858972224e-06, "loss": 0.8056, "step": 804 }, { "epoch": 0.05, "grad_norm": 1.6888977206436855, "learning_rate": 9.987874821661124e-06, "loss": 0.8485, "step": 805 }, { "epoch": 0.05, "grad_norm": 1.8366493802723163, "learning_rate": 9.987802569981647e-06, "loss": 0.8189, "step": 806 }, { "epoch": 0.05, "grad_norm": 1.560513084757402, "learning_rate": 9.987730103936895e-06, "loss": 0.7605, "step": 807 }, { "epoch": 0.05, "grad_norm": 1.1340716683844125, "learning_rate": 9.987657423529982e-06, "loss": 0.6281, "step": 808 }, { "epoch": 0.05, "grad_norm": 1.7510007831783003, "learning_rate": 9.987584528764036e-06, "loss": 0.8492, "step": 809 }, { "epoch": 0.05, "grad_norm": 1.5522324000207517, "learning_rate": 9.987511419642186e-06, "loss": 0.8888, "step": 810 }, { "epoch": 0.05, "grad_norm": 1.9455568048112146, "learning_rate": 9.987438096167577e-06, "loss": 0.9698, "step": 811 }, { "epoch": 0.05, "grad_norm": 1.6399027053730926, "learning_rate": 9.987364558343357e-06, "loss": 0.8337, "step": 812 }, { "epoch": 0.05, "grad_norm": 1.6678866111959756, "learning_rate": 9.987290806172689e-06, "loss": 0.8211, "step": 813 }, { "epoch": 0.05, "grad_norm": 1.4528357809761134, "learning_rate": 9.98721683965874e-06, "loss": 0.66, "step": 814 }, { "epoch": 0.05, "grad_norm": 1.1883757979784033, "learning_rate": 9.987142658804692e-06, "loss": 0.6931, "step": 815 }, { "epoch": 0.05, "grad_norm": 1.855555683387513, "learning_rate": 9.987068263613733e-06, "loss": 0.7755, "step": 816 }, { "epoch": 0.05, "grad_norm": 1.4826379571696928, "learning_rate": 9.986993654089059e-06, "loss": 0.7507, "step": 817 }, { "epoch": 0.05, "grad_norm": 1.8648487927107544, "learning_rate": 9.986918830233877e-06, "loss": 0.7954, "step": 818 }, { "epoch": 0.05, "grad_norm": 1.7616043702994986, "learning_rate": 9.986843792051402e-06, "loss": 0.8563, "step": 819 }, { "epoch": 0.05, "grad_norm": 2.4628736568345904, "learning_rate": 9.98676853954486e-06, "loss": 0.8255, "step": 820 }, { "epoch": 0.05, "grad_norm": 1.8324654618570015, "learning_rate": 9.986693072717483e-06, "loss": 0.8657, "step": 821 }, { "epoch": 0.05, "grad_norm": 1.578099396568665, "learning_rate": 9.98661739157252e-06, "loss": 0.7823, "step": 822 }, { "epoch": 0.05, "grad_norm": 1.221076633493838, "learning_rate": 9.986541496113215e-06, "loss": 0.662, "step": 823 }, { "epoch": 0.05, "grad_norm": 1.4558469800638394, "learning_rate": 9.986465386342838e-06, "loss": 0.8151, "step": 824 }, { "epoch": 0.05, "grad_norm": 1.8527167154328192, "learning_rate": 9.986389062264656e-06, "loss": 0.8437, "step": 825 }, { "epoch": 0.05, "grad_norm": 1.8003292248476161, "learning_rate": 9.986312523881952e-06, "loss": 0.8334, "step": 826 }, { "epoch": 0.05, "grad_norm": 1.7005240707831848, "learning_rate": 9.98623577119801e-06, "loss": 0.8463, "step": 827 }, { "epoch": 0.05, "grad_norm": 1.6215560407818999, "learning_rate": 9.986158804216133e-06, "loss": 0.8278, "step": 828 }, { "epoch": 0.05, "grad_norm": 1.7900263410954174, "learning_rate": 9.98608162293963e-06, "loss": 0.7979, "step": 829 }, { "epoch": 0.05, "grad_norm": 2.916971989585298, "learning_rate": 9.986004227371814e-06, "loss": 0.7777, "step": 830 }, { "epoch": 0.05, "grad_norm": 1.8423134072669005, "learning_rate": 9.985926617516012e-06, "loss": 0.7351, "step": 831 }, { "epoch": 0.05, "grad_norm": 1.56883812790139, "learning_rate": 9.985848793375563e-06, "loss": 0.7645, "step": 832 }, { "epoch": 0.05, "grad_norm": 1.1762151235177551, "learning_rate": 9.98577075495381e-06, "loss": 0.5859, "step": 833 }, { "epoch": 0.05, "grad_norm": 1.5698722461370058, "learning_rate": 9.985692502254105e-06, "loss": 0.8228, "step": 834 }, { "epoch": 0.05, "grad_norm": 1.8536037186234628, "learning_rate": 9.985614035279813e-06, "loss": 0.7907, "step": 835 }, { "epoch": 0.05, "grad_norm": 1.6926802753369934, "learning_rate": 9.985535354034309e-06, "loss": 0.8115, "step": 836 }, { "epoch": 0.05, "grad_norm": 1.6514260982309326, "learning_rate": 9.985456458520968e-06, "loss": 0.8088, "step": 837 }, { "epoch": 0.05, "grad_norm": 1.6217780107204238, "learning_rate": 9.985377348743183e-06, "loss": 0.8737, "step": 838 }, { "epoch": 0.05, "grad_norm": 1.6419443566173086, "learning_rate": 9.98529802470436e-06, "loss": 0.7856, "step": 839 }, { "epoch": 0.05, "grad_norm": 1.7457584196617566, "learning_rate": 9.985218486407899e-06, "loss": 0.7398, "step": 840 }, { "epoch": 0.05, "grad_norm": 1.8268601115444503, "learning_rate": 9.985138733857225e-06, "loss": 0.8514, "step": 841 }, { "epoch": 0.05, "grad_norm": 1.9780923462654605, "learning_rate": 9.985058767055765e-06, "loss": 0.8388, "step": 842 }, { "epoch": 0.05, "grad_norm": 2.2508525571258096, "learning_rate": 9.984978586006951e-06, "loss": 0.7703, "step": 843 }, { "epoch": 0.05, "grad_norm": 1.7015856490875, "learning_rate": 9.984898190714235e-06, "loss": 0.7384, "step": 844 }, { "epoch": 0.05, "grad_norm": 1.5846031821312934, "learning_rate": 9.984817581181068e-06, "loss": 0.8528, "step": 845 }, { "epoch": 0.05, "grad_norm": 1.599064294242115, "learning_rate": 9.984736757410915e-06, "loss": 0.8833, "step": 846 }, { "epoch": 0.05, "grad_norm": 2.0962252125374476, "learning_rate": 9.984655719407252e-06, "loss": 0.8445, "step": 847 }, { "epoch": 0.05, "grad_norm": 1.5861757561983147, "learning_rate": 9.98457446717356e-06, "loss": 0.7829, "step": 848 }, { "epoch": 0.05, "grad_norm": 1.7631846659369335, "learning_rate": 9.98449300071333e-06, "loss": 0.8175, "step": 849 }, { "epoch": 0.05, "grad_norm": 1.7637492502741967, "learning_rate": 9.984411320030068e-06, "loss": 0.8524, "step": 850 }, { "epoch": 0.05, "grad_norm": 1.555495305101137, "learning_rate": 9.98432942512728e-06, "loss": 0.7203, "step": 851 }, { "epoch": 0.05, "grad_norm": 1.3079496275864055, "learning_rate": 9.984247316008484e-06, "loss": 0.6634, "step": 852 }, { "epoch": 0.05, "grad_norm": 1.4297016893079166, "learning_rate": 9.984164992677215e-06, "loss": 0.7117, "step": 853 }, { "epoch": 0.05, "grad_norm": 2.3291815880394435, "learning_rate": 9.984082455137007e-06, "loss": 0.847, "step": 854 }, { "epoch": 0.05, "grad_norm": 1.935220403109089, "learning_rate": 9.983999703391408e-06, "loss": 0.7978, "step": 855 }, { "epoch": 0.05, "grad_norm": 1.6090474462941708, "learning_rate": 9.983916737443973e-06, "loss": 0.8149, "step": 856 }, { "epoch": 0.05, "grad_norm": 1.7711225245604545, "learning_rate": 9.98383355729827e-06, "loss": 0.8223, "step": 857 }, { "epoch": 0.05, "grad_norm": 1.34215927866768, "learning_rate": 9.983750162957874e-06, "loss": 0.6369, "step": 858 }, { "epoch": 0.05, "grad_norm": 1.6606345533573692, "learning_rate": 9.983666554426367e-06, "loss": 0.7453, "step": 859 }, { "epoch": 0.06, "grad_norm": 1.7253265793834052, "learning_rate": 9.983582731707346e-06, "loss": 0.8816, "step": 860 }, { "epoch": 0.06, "grad_norm": 1.7004016394812722, "learning_rate": 9.98349869480441e-06, "loss": 0.7653, "step": 861 }, { "epoch": 0.06, "grad_norm": 2.0836504895521344, "learning_rate": 9.98341444372117e-06, "loss": 0.8048, "step": 862 }, { "epoch": 0.06, "grad_norm": 1.649949313310509, "learning_rate": 9.983329978461252e-06, "loss": 0.7932, "step": 863 }, { "epoch": 0.06, "grad_norm": 1.864089650516792, "learning_rate": 9.983245299028281e-06, "loss": 0.8072, "step": 864 }, { "epoch": 0.06, "grad_norm": 1.721167012242536, "learning_rate": 9.9831604054259e-06, "loss": 0.8328, "step": 865 }, { "epoch": 0.06, "grad_norm": 1.8544418420151518, "learning_rate": 9.983075297657753e-06, "loss": 0.7829, "step": 866 }, { "epoch": 0.06, "grad_norm": 2.1198069039159493, "learning_rate": 9.982989975727502e-06, "loss": 0.7057, "step": 867 }, { "epoch": 0.06, "grad_norm": 1.7403726346127193, "learning_rate": 9.982904439638814e-06, "loss": 0.7612, "step": 868 }, { "epoch": 0.06, "grad_norm": 2.335618677990451, "learning_rate": 9.982818689395362e-06, "loss": 0.8043, "step": 869 }, { "epoch": 0.06, "grad_norm": 1.3785603455607496, "learning_rate": 9.982732725000834e-06, "loss": 0.6178, "step": 870 }, { "epoch": 0.06, "grad_norm": 2.1631409019434016, "learning_rate": 9.982646546458922e-06, "loss": 0.8165, "step": 871 }, { "epoch": 0.06, "grad_norm": 2.4416695541169897, "learning_rate": 9.982560153773333e-06, "loss": 0.8545, "step": 872 }, { "epoch": 0.06, "grad_norm": 1.544923570219026, "learning_rate": 9.982473546947777e-06, "loss": 0.7866, "step": 873 }, { "epoch": 0.06, "grad_norm": 2.0031749533494243, "learning_rate": 9.982386725985979e-06, "loss": 0.8421, "step": 874 }, { "epoch": 0.06, "grad_norm": 2.0965506619668393, "learning_rate": 9.982299690891668e-06, "loss": 0.6921, "step": 875 }, { "epoch": 0.06, "grad_norm": 1.8272760409451145, "learning_rate": 9.982212441668586e-06, "loss": 0.7736, "step": 876 }, { "epoch": 0.06, "grad_norm": 1.13878466052574, "learning_rate": 9.982124978320482e-06, "loss": 0.7011, "step": 877 }, { "epoch": 0.06, "grad_norm": 2.6056032535500444, "learning_rate": 9.982037300851117e-06, "loss": 0.7788, "step": 878 }, { "epoch": 0.06, "grad_norm": 1.7344405261428644, "learning_rate": 9.981949409264256e-06, "loss": 0.7374, "step": 879 }, { "epoch": 0.06, "grad_norm": 1.7876810779571652, "learning_rate": 9.981861303563678e-06, "loss": 0.8939, "step": 880 }, { "epoch": 0.06, "grad_norm": 1.584327424787609, "learning_rate": 9.981772983753169e-06, "loss": 0.7408, "step": 881 }, { "epoch": 0.06, "grad_norm": 1.8236129083998216, "learning_rate": 9.981684449836526e-06, "loss": 0.7773, "step": 882 }, { "epoch": 0.06, "grad_norm": 1.7704233017958437, "learning_rate": 9.981595701817552e-06, "loss": 0.813, "step": 883 }, { "epoch": 0.06, "grad_norm": 1.812518240614498, "learning_rate": 9.981506739700063e-06, "loss": 0.8552, "step": 884 }, { "epoch": 0.06, "grad_norm": 1.9521749957205, "learning_rate": 9.981417563487884e-06, "loss": 0.9149, "step": 885 }, { "epoch": 0.06, "grad_norm": 1.6328327796861648, "learning_rate": 9.981328173184845e-06, "loss": 0.7736, "step": 886 }, { "epoch": 0.06, "grad_norm": 3.8602945385042595, "learning_rate": 9.981238568794787e-06, "loss": 0.9247, "step": 887 }, { "epoch": 0.06, "grad_norm": 1.8996443014687983, "learning_rate": 9.981148750321563e-06, "loss": 0.8465, "step": 888 }, { "epoch": 0.06, "grad_norm": 2.028836638491653, "learning_rate": 9.981058717769031e-06, "loss": 0.8824, "step": 889 }, { "epoch": 0.06, "grad_norm": 1.6664026542388413, "learning_rate": 9.980968471141064e-06, "loss": 0.8472, "step": 890 }, { "epoch": 0.06, "grad_norm": 1.559074189244838, "learning_rate": 9.980878010441539e-06, "loss": 0.7771, "step": 891 }, { "epoch": 0.06, "grad_norm": 1.8945705536925428, "learning_rate": 9.980787335674341e-06, "loss": 0.7506, "step": 892 }, { "epoch": 0.06, "grad_norm": 1.6430270862377452, "learning_rate": 9.980696446843371e-06, "loss": 0.8531, "step": 893 }, { "epoch": 0.06, "grad_norm": 2.212177128013236, "learning_rate": 9.980605343952534e-06, "loss": 0.7421, "step": 894 }, { "epoch": 0.06, "grad_norm": 1.034012216752867, "learning_rate": 9.980514027005743e-06, "loss": 0.5736, "step": 895 }, { "epoch": 0.06, "grad_norm": 2.11485128646907, "learning_rate": 9.980422496006926e-06, "loss": 0.7745, "step": 896 }, { "epoch": 0.06, "grad_norm": 1.8248238014744422, "learning_rate": 9.980330750960014e-06, "loss": 0.821, "step": 897 }, { "epoch": 0.06, "grad_norm": 1.8874678531504199, "learning_rate": 9.980238791868955e-06, "loss": 0.7938, "step": 898 }, { "epoch": 0.06, "grad_norm": 1.724323346203904, "learning_rate": 9.980146618737694e-06, "loss": 0.8656, "step": 899 }, { "epoch": 0.06, "grad_norm": 1.680485425665482, "learning_rate": 9.980054231570197e-06, "loss": 0.8994, "step": 900 }, { "epoch": 0.06, "grad_norm": 1.2539762024234902, "learning_rate": 9.979961630370435e-06, "loss": 0.6834, "step": 901 }, { "epoch": 0.06, "grad_norm": 1.6587709928792294, "learning_rate": 9.979868815142385e-06, "loss": 0.8607, "step": 902 }, { "epoch": 0.06, "grad_norm": 1.660808030736601, "learning_rate": 9.979775785890037e-06, "loss": 0.7632, "step": 903 }, { "epoch": 0.06, "grad_norm": 1.7194602880052017, "learning_rate": 9.979682542617393e-06, "loss": 0.8325, "step": 904 }, { "epoch": 0.06, "grad_norm": 1.512573082515266, "learning_rate": 9.979589085328454e-06, "loss": 0.7285, "step": 905 }, { "epoch": 0.06, "grad_norm": 1.8595539190096582, "learning_rate": 9.979495414027241e-06, "loss": 0.8972, "step": 906 }, { "epoch": 0.06, "grad_norm": 1.747822528305638, "learning_rate": 9.979401528717777e-06, "loss": 0.9206, "step": 907 }, { "epoch": 0.06, "grad_norm": 1.237202734611535, "learning_rate": 9.9793074294041e-06, "loss": 0.7132, "step": 908 }, { "epoch": 0.06, "grad_norm": 1.8880266666356773, "learning_rate": 9.979213116090251e-06, "loss": 0.9382, "step": 909 }, { "epoch": 0.06, "grad_norm": 1.7942606237383742, "learning_rate": 9.979118588780286e-06, "loss": 0.7715, "step": 910 }, { "epoch": 0.06, "grad_norm": 2.1904560237142214, "learning_rate": 9.979023847478268e-06, "loss": 0.7335, "step": 911 }, { "epoch": 0.06, "grad_norm": 1.8299568096825518, "learning_rate": 9.978928892188265e-06, "loss": 0.7281, "step": 912 }, { "epoch": 0.06, "grad_norm": 1.4939422007046923, "learning_rate": 9.978833722914362e-06, "loss": 0.7295, "step": 913 }, { "epoch": 0.06, "grad_norm": 1.6442223205652708, "learning_rate": 9.978738339660648e-06, "loss": 0.7863, "step": 914 }, { "epoch": 0.06, "grad_norm": 2.027575323204377, "learning_rate": 9.97864274243122e-06, "loss": 0.8574, "step": 915 }, { "epoch": 0.06, "grad_norm": 1.8471711312150574, "learning_rate": 9.97854693123019e-06, "loss": 0.818, "step": 916 }, { "epoch": 0.06, "grad_norm": 2.553287868855059, "learning_rate": 9.978450906061673e-06, "loss": 0.7479, "step": 917 }, { "epoch": 0.06, "grad_norm": 1.8426039337639593, "learning_rate": 9.978354666929797e-06, "loss": 0.7907, "step": 918 }, { "epoch": 0.06, "grad_norm": 1.3057729660575756, "learning_rate": 9.978258213838701e-06, "loss": 0.6456, "step": 919 }, { "epoch": 0.06, "grad_norm": 1.9073358057414, "learning_rate": 9.978161546792526e-06, "loss": 0.7905, "step": 920 }, { "epoch": 0.06, "grad_norm": 1.739600533215442, "learning_rate": 9.978064665795429e-06, "loss": 0.7867, "step": 921 }, { "epoch": 0.06, "grad_norm": 1.9988821019859582, "learning_rate": 9.977967570851571e-06, "loss": 0.8279, "step": 922 }, { "epoch": 0.06, "grad_norm": 1.7025537988100636, "learning_rate": 9.977870261965128e-06, "loss": 0.8551, "step": 923 }, { "epoch": 0.06, "grad_norm": 1.7434676703686434, "learning_rate": 9.97777273914028e-06, "loss": 0.8433, "step": 924 }, { "epoch": 0.06, "grad_norm": 3.0867828904902925, "learning_rate": 9.977675002381222e-06, "loss": 0.8601, "step": 925 }, { "epoch": 0.06, "grad_norm": 1.291048328002017, "learning_rate": 9.97757705169215e-06, "loss": 0.6685, "step": 926 }, { "epoch": 0.06, "grad_norm": 2.3698665062856916, "learning_rate": 9.977478887077277e-06, "loss": 0.8052, "step": 927 }, { "epoch": 0.06, "grad_norm": 1.296057663000789, "learning_rate": 9.97738050854082e-06, "loss": 0.6841, "step": 928 }, { "epoch": 0.06, "grad_norm": 1.9783352414390911, "learning_rate": 9.977281916087008e-06, "loss": 0.834, "step": 929 }, { "epoch": 0.06, "grad_norm": 1.1871850883877377, "learning_rate": 9.977183109720078e-06, "loss": 0.6548, "step": 930 }, { "epoch": 0.06, "grad_norm": 1.6650762431837502, "learning_rate": 9.977084089444276e-06, "loss": 0.762, "step": 931 }, { "epoch": 0.06, "grad_norm": 1.5191510429633672, "learning_rate": 9.976984855263859e-06, "loss": 0.7787, "step": 932 }, { "epoch": 0.06, "grad_norm": 1.1949026033384544, "learning_rate": 9.976885407183088e-06, "loss": 0.6949, "step": 933 }, { "epoch": 0.06, "grad_norm": 2.1265182848023776, "learning_rate": 9.976785745206243e-06, "loss": 0.8059, "step": 934 }, { "epoch": 0.06, "grad_norm": 1.5023177144506585, "learning_rate": 9.976685869337603e-06, "loss": 0.7638, "step": 935 }, { "epoch": 0.06, "grad_norm": 1.859074052313539, "learning_rate": 9.976585779581463e-06, "loss": 0.805, "step": 936 }, { "epoch": 0.06, "grad_norm": 1.9166523289810466, "learning_rate": 9.976485475942123e-06, "loss": 0.7519, "step": 937 }, { "epoch": 0.06, "grad_norm": 1.8364399967598208, "learning_rate": 9.976384958423895e-06, "loss": 0.8313, "step": 938 }, { "epoch": 0.06, "grad_norm": 1.6924655263311008, "learning_rate": 9.976284227031097e-06, "loss": 0.7466, "step": 939 }, { "epoch": 0.06, "grad_norm": 1.5588838056577292, "learning_rate": 9.976183281768059e-06, "loss": 0.8613, "step": 940 }, { "epoch": 0.06, "grad_norm": 1.6977845314663331, "learning_rate": 9.976082122639122e-06, "loss": 0.8296, "step": 941 }, { "epoch": 0.06, "grad_norm": 1.7000619421604357, "learning_rate": 9.97598074964863e-06, "loss": 0.7738, "step": 942 }, { "epoch": 0.06, "grad_norm": 1.4903123648402123, "learning_rate": 9.975879162800942e-06, "loss": 0.6647, "step": 943 }, { "epoch": 0.06, "grad_norm": 1.802265542799413, "learning_rate": 9.975777362100423e-06, "loss": 0.8019, "step": 944 }, { "epoch": 0.06, "grad_norm": 1.4119184121298731, "learning_rate": 9.975675347551449e-06, "loss": 0.6341, "step": 945 }, { "epoch": 0.06, "grad_norm": 1.8640825540932249, "learning_rate": 9.975573119158402e-06, "loss": 0.8759, "step": 946 }, { "epoch": 0.06, "grad_norm": 1.2220188922917476, "learning_rate": 9.975470676925681e-06, "loss": 0.7469, "step": 947 }, { "epoch": 0.06, "grad_norm": 1.9217918232903124, "learning_rate": 9.975368020857682e-06, "loss": 0.812, "step": 948 }, { "epoch": 0.06, "grad_norm": 1.6279138257404124, "learning_rate": 9.975265150958822e-06, "loss": 0.8533, "step": 949 }, { "epoch": 0.06, "grad_norm": 1.8927159890449348, "learning_rate": 9.97516206723352e-06, "loss": 0.8734, "step": 950 }, { "epoch": 0.06, "grad_norm": 1.4644194032626043, "learning_rate": 9.975058769686205e-06, "loss": 0.759, "step": 951 }, { "epoch": 0.06, "grad_norm": 1.6480380001173076, "learning_rate": 9.974955258321319e-06, "loss": 0.812, "step": 952 }, { "epoch": 0.06, "grad_norm": 1.162204794114099, "learning_rate": 9.97485153314331e-06, "loss": 0.7308, "step": 953 }, { "epoch": 0.06, "grad_norm": 1.6048716908486744, "learning_rate": 9.974747594156638e-06, "loss": 0.7845, "step": 954 }, { "epoch": 0.06, "grad_norm": 1.9655482433036815, "learning_rate": 9.974643441365765e-06, "loss": 0.8283, "step": 955 }, { "epoch": 0.06, "grad_norm": 1.9679836566575868, "learning_rate": 9.97453907477517e-06, "loss": 0.8041, "step": 956 }, { "epoch": 0.06, "grad_norm": 2.0304471187141337, "learning_rate": 9.97443449438934e-06, "loss": 0.7962, "step": 957 }, { "epoch": 0.06, "grad_norm": 1.7053774352521933, "learning_rate": 9.974329700212767e-06, "loss": 0.8732, "step": 958 }, { "epoch": 0.06, "grad_norm": 1.7620154923279865, "learning_rate": 9.974224692249956e-06, "loss": 0.8367, "step": 959 }, { "epoch": 0.06, "grad_norm": 1.7965079292238368, "learning_rate": 9.974119470505422e-06, "loss": 0.7761, "step": 960 }, { "epoch": 0.06, "grad_norm": 1.7558228553384074, "learning_rate": 9.974014034983684e-06, "loss": 0.7876, "step": 961 }, { "epoch": 0.06, "grad_norm": 1.209097773999467, "learning_rate": 9.973908385689273e-06, "loss": 0.7725, "step": 962 }, { "epoch": 0.06, "grad_norm": 2.4951339369257246, "learning_rate": 9.973802522626731e-06, "loss": 0.696, "step": 963 }, { "epoch": 0.06, "grad_norm": 1.5212018106024245, "learning_rate": 9.973696445800611e-06, "loss": 0.7429, "step": 964 }, { "epoch": 0.06, "grad_norm": 1.6461596785136001, "learning_rate": 9.973590155215467e-06, "loss": 0.9428, "step": 965 }, { "epoch": 0.06, "grad_norm": 1.6905890079767727, "learning_rate": 9.97348365087587e-06, "loss": 0.8171, "step": 966 }, { "epoch": 0.06, "grad_norm": 1.8703227778681335, "learning_rate": 9.973376932786396e-06, "loss": 0.8586, "step": 967 }, { "epoch": 0.06, "grad_norm": 1.9199712092949013, "learning_rate": 9.97327000095163e-06, "loss": 0.8754, "step": 968 }, { "epoch": 0.06, "grad_norm": 1.7156572873210767, "learning_rate": 9.973162855376173e-06, "loss": 0.8867, "step": 969 }, { "epoch": 0.06, "grad_norm": 1.0193019454622512, "learning_rate": 9.973055496064625e-06, "loss": 0.5123, "step": 970 }, { "epoch": 0.06, "grad_norm": 2.863176020546753, "learning_rate": 9.972947923021602e-06, "loss": 0.8613, "step": 971 }, { "epoch": 0.06, "grad_norm": 1.6078011191929287, "learning_rate": 9.972840136251728e-06, "loss": 0.7053, "step": 972 }, { "epoch": 0.06, "grad_norm": 1.750139984697171, "learning_rate": 9.972732135759631e-06, "loss": 0.7855, "step": 973 }, { "epoch": 0.06, "grad_norm": 1.893774894993657, "learning_rate": 9.972623921549957e-06, "loss": 0.7819, "step": 974 }, { "epoch": 0.06, "grad_norm": 1.711605760760621, "learning_rate": 9.972515493627357e-06, "loss": 0.8063, "step": 975 }, { "epoch": 0.06, "grad_norm": 1.6942019553205752, "learning_rate": 9.97240685199649e-06, "loss": 0.8791, "step": 976 }, { "epoch": 0.06, "grad_norm": 1.5774465958417527, "learning_rate": 9.972297996662023e-06, "loss": 0.8127, "step": 977 }, { "epoch": 0.06, "grad_norm": 2.182424347577263, "learning_rate": 9.972188927628638e-06, "loss": 0.8829, "step": 978 }, { "epoch": 0.06, "grad_norm": 1.8547449553127224, "learning_rate": 9.972079644901021e-06, "loss": 0.7726, "step": 979 }, { "epoch": 0.06, "grad_norm": 1.9071248937893595, "learning_rate": 9.971970148483868e-06, "loss": 0.6785, "step": 980 }, { "epoch": 0.06, "grad_norm": 1.2742089146127478, "learning_rate": 9.971860438381886e-06, "loss": 0.666, "step": 981 }, { "epoch": 0.06, "grad_norm": 2.184813042375622, "learning_rate": 9.97175051459979e-06, "loss": 0.7587, "step": 982 }, { "epoch": 0.06, "grad_norm": 1.6153942895216418, "learning_rate": 9.971640377142301e-06, "loss": 0.7268, "step": 983 }, { "epoch": 0.06, "grad_norm": 1.760948023122805, "learning_rate": 9.97153002601416e-06, "loss": 0.8036, "step": 984 }, { "epoch": 0.06, "grad_norm": 1.772057183329596, "learning_rate": 9.971419461220102e-06, "loss": 0.7843, "step": 985 }, { "epoch": 0.06, "grad_norm": 1.6983752604875413, "learning_rate": 9.971308682764884e-06, "loss": 0.7907, "step": 986 }, { "epoch": 0.06, "grad_norm": 1.4572386160660538, "learning_rate": 9.971197690653263e-06, "loss": 0.7639, "step": 987 }, { "epoch": 0.06, "grad_norm": 1.6118300011519704, "learning_rate": 9.971086484890013e-06, "loss": 0.7264, "step": 988 }, { "epoch": 0.06, "grad_norm": 1.5709537082056608, "learning_rate": 9.970975065479909e-06, "loss": 0.8005, "step": 989 }, { "epoch": 0.06, "grad_norm": 1.5155984773689979, "learning_rate": 9.970863432427745e-06, "loss": 0.713, "step": 990 }, { "epoch": 0.06, "grad_norm": 1.7377017911891726, "learning_rate": 9.970751585738315e-06, "loss": 0.8433, "step": 991 }, { "epoch": 0.06, "grad_norm": 2.246212525838097, "learning_rate": 9.970639525416425e-06, "loss": 0.8386, "step": 992 }, { "epoch": 0.06, "grad_norm": 1.7752655372130988, "learning_rate": 9.970527251466895e-06, "loss": 0.8012, "step": 993 }, { "epoch": 0.06, "grad_norm": 1.2668862201417495, "learning_rate": 9.970414763894548e-06, "loss": 0.7033, "step": 994 }, { "epoch": 0.06, "grad_norm": 1.692388810949796, "learning_rate": 9.970302062704218e-06, "loss": 0.8189, "step": 995 }, { "epoch": 0.06, "grad_norm": 1.2606646122573721, "learning_rate": 9.970189147900751e-06, "loss": 0.6702, "step": 996 }, { "epoch": 0.06, "grad_norm": 1.5928050374000506, "learning_rate": 9.970076019488997e-06, "loss": 0.9424, "step": 997 }, { "epoch": 0.06, "grad_norm": 1.5969275381448285, "learning_rate": 9.96996267747382e-06, "loss": 0.6672, "step": 998 }, { "epoch": 0.06, "grad_norm": 1.7323502281575247, "learning_rate": 9.969849121860089e-06, "loss": 0.7987, "step": 999 }, { "epoch": 0.06, "grad_norm": 1.920784871731263, "learning_rate": 9.969735352652685e-06, "loss": 0.7411, "step": 1000 }, { "epoch": 0.06, "grad_norm": 1.8321994188377053, "learning_rate": 9.969621369856502e-06, "loss": 0.8502, "step": 1001 }, { "epoch": 0.06, "grad_norm": 1.8050186038706368, "learning_rate": 9.969507173476431e-06, "loss": 0.8215, "step": 1002 }, { "epoch": 0.06, "grad_norm": 1.5655090681369535, "learning_rate": 9.969392763517387e-06, "loss": 0.7906, "step": 1003 }, { "epoch": 0.06, "grad_norm": 1.6225863067760304, "learning_rate": 9.969278139984283e-06, "loss": 0.8141, "step": 1004 }, { "epoch": 0.06, "grad_norm": 1.3517876963487538, "learning_rate": 9.969163302882048e-06, "loss": 0.8045, "step": 1005 }, { "epoch": 0.06, "grad_norm": 1.1980795211584452, "learning_rate": 9.969048252215614e-06, "loss": 0.6244, "step": 1006 }, { "epoch": 0.06, "grad_norm": 1.7552513377662913, "learning_rate": 9.968932987989927e-06, "loss": 0.788, "step": 1007 }, { "epoch": 0.06, "grad_norm": 1.7984321598667672, "learning_rate": 9.968817510209943e-06, "loss": 0.8213, "step": 1008 }, { "epoch": 0.06, "grad_norm": 1.7619735054591084, "learning_rate": 9.968701818880624e-06, "loss": 0.761, "step": 1009 }, { "epoch": 0.06, "grad_norm": 1.5751985239745427, "learning_rate": 9.96858591400694e-06, "loss": 0.7977, "step": 1010 }, { "epoch": 0.06, "grad_norm": 1.8549202334321389, "learning_rate": 9.968469795593874e-06, "loss": 0.8294, "step": 1011 }, { "epoch": 0.06, "grad_norm": 11.918624539478834, "learning_rate": 9.968353463646417e-06, "loss": 0.8348, "step": 1012 }, { "epoch": 0.06, "grad_norm": 1.359964957536188, "learning_rate": 9.968236918169567e-06, "loss": 0.761, "step": 1013 }, { "epoch": 0.06, "grad_norm": 1.7592284621527632, "learning_rate": 9.968120159168335e-06, "loss": 0.8774, "step": 1014 }, { "epoch": 0.06, "grad_norm": 1.6502989197752436, "learning_rate": 9.968003186647737e-06, "loss": 0.7527, "step": 1015 }, { "epoch": 0.07, "grad_norm": 1.6418984128945622, "learning_rate": 9.967886000612801e-06, "loss": 0.7975, "step": 1016 }, { "epoch": 0.07, "grad_norm": 3.0608603685708564, "learning_rate": 9.967768601068566e-06, "loss": 0.7914, "step": 1017 }, { "epoch": 0.07, "grad_norm": 1.7056423465812403, "learning_rate": 9.967650988020073e-06, "loss": 0.7712, "step": 1018 }, { "epoch": 0.07, "grad_norm": 1.7201527451325045, "learning_rate": 9.96753316147238e-06, "loss": 0.8236, "step": 1019 }, { "epoch": 0.07, "grad_norm": 1.9611189652631817, "learning_rate": 9.96741512143055e-06, "loss": 0.7624, "step": 1020 }, { "epoch": 0.07, "grad_norm": 1.4851340973650697, "learning_rate": 9.967296867899656e-06, "loss": 0.6902, "step": 1021 }, { "epoch": 0.07, "grad_norm": 1.823229862870745, "learning_rate": 9.96717840088478e-06, "loss": 0.8049, "step": 1022 }, { "epoch": 0.07, "grad_norm": 1.6440414965179841, "learning_rate": 9.967059720391014e-06, "loss": 0.7553, "step": 1023 }, { "epoch": 0.07, "grad_norm": 1.9250127054765982, "learning_rate": 9.966940826423459e-06, "loss": 0.8414, "step": 1024 }, { "epoch": 0.07, "grad_norm": 1.3499167624390904, "learning_rate": 9.966821718987222e-06, "loss": 0.7979, "step": 1025 }, { "epoch": 0.07, "grad_norm": 1.9161141378796456, "learning_rate": 9.966702398087426e-06, "loss": 0.7605, "step": 1026 }, { "epoch": 0.07, "grad_norm": 2.2003765718641977, "learning_rate": 9.966582863729198e-06, "loss": 0.7628, "step": 1027 }, { "epoch": 0.07, "grad_norm": 1.575721983288062, "learning_rate": 9.966463115917676e-06, "loss": 0.8003, "step": 1028 }, { "epoch": 0.07, "grad_norm": 1.6872928130441491, "learning_rate": 9.966343154658002e-06, "loss": 0.8005, "step": 1029 }, { "epoch": 0.07, "grad_norm": 1.2843476990053713, "learning_rate": 9.966222979955335e-06, "loss": 0.5976, "step": 1030 }, { "epoch": 0.07, "grad_norm": 1.611900742486226, "learning_rate": 9.966102591814842e-06, "loss": 0.7374, "step": 1031 }, { "epoch": 0.07, "grad_norm": 2.4096062565233014, "learning_rate": 9.965981990241694e-06, "loss": 0.7185, "step": 1032 }, { "epoch": 0.07, "grad_norm": 1.2899156120816249, "learning_rate": 9.965861175241075e-06, "loss": 0.6923, "step": 1033 }, { "epoch": 0.07, "grad_norm": 1.646374765326677, "learning_rate": 9.965740146818177e-06, "loss": 0.8034, "step": 1034 }, { "epoch": 0.07, "grad_norm": 1.6714545583204408, "learning_rate": 9.965618904978203e-06, "loss": 0.7471, "step": 1035 }, { "epoch": 0.07, "grad_norm": 1.5997801171289365, "learning_rate": 9.965497449726363e-06, "loss": 0.724, "step": 1036 }, { "epoch": 0.07, "grad_norm": 1.960884123349882, "learning_rate": 9.965375781067874e-06, "loss": 0.8733, "step": 1037 }, { "epoch": 0.07, "grad_norm": 1.6278577181670926, "learning_rate": 9.965253899007969e-06, "loss": 0.8019, "step": 1038 }, { "epoch": 0.07, "grad_norm": 1.7530754070299037, "learning_rate": 9.965131803551885e-06, "loss": 0.7939, "step": 1039 }, { "epoch": 0.07, "grad_norm": 2.15460056507458, "learning_rate": 9.96500949470487e-06, "loss": 0.7998, "step": 1040 }, { "epoch": 0.07, "grad_norm": 1.4195656515375856, "learning_rate": 9.96488697247218e-06, "loss": 0.6727, "step": 1041 }, { "epoch": 0.07, "grad_norm": 1.974452397630562, "learning_rate": 9.964764236859079e-06, "loss": 0.913, "step": 1042 }, { "epoch": 0.07, "grad_norm": 1.9060573733696045, "learning_rate": 9.964641287870844e-06, "loss": 0.7413, "step": 1043 }, { "epoch": 0.07, "grad_norm": 1.5264996812621214, "learning_rate": 9.96451812551276e-06, "loss": 0.7524, "step": 1044 }, { "epoch": 0.07, "grad_norm": 1.7307052176870987, "learning_rate": 9.964394749790117e-06, "loss": 0.7477, "step": 1045 }, { "epoch": 0.07, "grad_norm": 1.1685522548566523, "learning_rate": 9.96427116070822e-06, "loss": 0.645, "step": 1046 }, { "epoch": 0.07, "grad_norm": 1.5598547225145147, "learning_rate": 9.964147358272379e-06, "loss": 0.7961, "step": 1047 }, { "epoch": 0.07, "grad_norm": 2.4581687689790006, "learning_rate": 9.964023342487916e-06, "loss": 0.7726, "step": 1048 }, { "epoch": 0.07, "grad_norm": 1.560382907370058, "learning_rate": 9.963899113360161e-06, "loss": 0.6989, "step": 1049 }, { "epoch": 0.07, "grad_norm": 1.5987629248349635, "learning_rate": 9.963774670894453e-06, "loss": 0.835, "step": 1050 }, { "epoch": 0.07, "grad_norm": 1.5349634073135987, "learning_rate": 9.96365001509614e-06, "loss": 0.7071, "step": 1051 }, { "epoch": 0.07, "grad_norm": 1.848281126483787, "learning_rate": 9.963525145970579e-06, "loss": 0.8316, "step": 1052 }, { "epoch": 0.07, "grad_norm": 1.699204953579773, "learning_rate": 9.963400063523136e-06, "loss": 0.7957, "step": 1053 }, { "epoch": 0.07, "grad_norm": 3.199519071782016, "learning_rate": 9.96327476775919e-06, "loss": 0.8867, "step": 1054 }, { "epoch": 0.07, "grad_norm": 1.7191737149204835, "learning_rate": 9.963149258684122e-06, "loss": 0.7788, "step": 1055 }, { "epoch": 0.07, "grad_norm": 1.7179450913104455, "learning_rate": 9.96302353630333e-06, "loss": 0.7505, "step": 1056 }, { "epoch": 0.07, "grad_norm": 2.15531910446281, "learning_rate": 9.962897600622212e-06, "loss": 0.8, "step": 1057 }, { "epoch": 0.07, "grad_norm": 1.864099456575368, "learning_rate": 9.962771451646187e-06, "loss": 0.8074, "step": 1058 }, { "epoch": 0.07, "grad_norm": 1.1922652650029255, "learning_rate": 9.962645089380672e-06, "loss": 0.6553, "step": 1059 }, { "epoch": 0.07, "grad_norm": 1.7963045601456857, "learning_rate": 9.962518513831096e-06, "loss": 0.8258, "step": 1060 }, { "epoch": 0.07, "grad_norm": 1.5137023793517184, "learning_rate": 9.962391725002906e-06, "loss": 0.7029, "step": 1061 }, { "epoch": 0.07, "grad_norm": 1.680563839751371, "learning_rate": 9.962264722901545e-06, "loss": 0.7972, "step": 1062 }, { "epoch": 0.07, "grad_norm": 1.6516263473426431, "learning_rate": 9.962137507532474e-06, "loss": 0.786, "step": 1063 }, { "epoch": 0.07, "grad_norm": 1.8136187245173037, "learning_rate": 9.962010078901161e-06, "loss": 0.7969, "step": 1064 }, { "epoch": 0.07, "grad_norm": 1.8266689724362635, "learning_rate": 9.961882437013079e-06, "loss": 0.7378, "step": 1065 }, { "epoch": 0.07, "grad_norm": 1.7841412156398078, "learning_rate": 9.961754581873717e-06, "loss": 0.8479, "step": 1066 }, { "epoch": 0.07, "grad_norm": 1.6961727945011782, "learning_rate": 9.961626513488572e-06, "loss": 0.7693, "step": 1067 }, { "epoch": 0.07, "grad_norm": 1.8270895993815723, "learning_rate": 9.961498231863142e-06, "loss": 0.7268, "step": 1068 }, { "epoch": 0.07, "grad_norm": 1.8630833632888943, "learning_rate": 9.961369737002943e-06, "loss": 0.8149, "step": 1069 }, { "epoch": 0.07, "grad_norm": 1.6138338848266776, "learning_rate": 9.9612410289135e-06, "loss": 0.7626, "step": 1070 }, { "epoch": 0.07, "grad_norm": 1.8004478790900855, "learning_rate": 9.961112107600343e-06, "loss": 0.7827, "step": 1071 }, { "epoch": 0.07, "grad_norm": 1.9810486311079631, "learning_rate": 9.960982973069011e-06, "loss": 0.8418, "step": 1072 }, { "epoch": 0.07, "grad_norm": 1.9157538426128713, "learning_rate": 9.960853625325056e-06, "loss": 0.8572, "step": 1073 }, { "epoch": 0.07, "grad_norm": 1.5623156653638275, "learning_rate": 9.960724064374035e-06, "loss": 0.7801, "step": 1074 }, { "epoch": 0.07, "grad_norm": 1.59797029675899, "learning_rate": 9.960594290221519e-06, "loss": 0.7808, "step": 1075 }, { "epoch": 0.07, "grad_norm": 1.7665084373021398, "learning_rate": 9.960464302873084e-06, "loss": 0.6663, "step": 1076 }, { "epoch": 0.07, "grad_norm": 1.1121457621805952, "learning_rate": 9.960334102334316e-06, "loss": 0.709, "step": 1077 }, { "epoch": 0.07, "grad_norm": 1.5440001585794, "learning_rate": 9.960203688610813e-06, "loss": 0.7415, "step": 1078 }, { "epoch": 0.07, "grad_norm": 2.570283161360888, "learning_rate": 9.960073061708176e-06, "loss": 0.7324, "step": 1079 }, { "epoch": 0.07, "grad_norm": 1.5567485113673643, "learning_rate": 9.959942221632021e-06, "loss": 0.7437, "step": 1080 }, { "epoch": 0.07, "grad_norm": 2.189257993890257, "learning_rate": 9.959811168387974e-06, "loss": 0.7912, "step": 1081 }, { "epoch": 0.07, "grad_norm": 2.370152677194013, "learning_rate": 9.959679901981663e-06, "loss": 0.8654, "step": 1082 }, { "epoch": 0.07, "grad_norm": 1.523728770735352, "learning_rate": 9.959548422418734e-06, "loss": 0.8644, "step": 1083 }, { "epoch": 0.07, "grad_norm": 3.148943308804703, "learning_rate": 9.959416729704832e-06, "loss": 0.8703, "step": 1084 }, { "epoch": 0.07, "grad_norm": 1.5434011483013363, "learning_rate": 9.959284823845623e-06, "loss": 0.7677, "step": 1085 }, { "epoch": 0.07, "grad_norm": 1.8802879511907205, "learning_rate": 9.959152704846771e-06, "loss": 0.7944, "step": 1086 }, { "epoch": 0.07, "grad_norm": 1.7003060589537913, "learning_rate": 9.959020372713959e-06, "loss": 0.7959, "step": 1087 }, { "epoch": 0.07, "grad_norm": 1.7370777641067103, "learning_rate": 9.95888782745287e-06, "loss": 0.8026, "step": 1088 }, { "epoch": 0.07, "grad_norm": 1.6054850725319165, "learning_rate": 9.958755069069202e-06, "loss": 0.7929, "step": 1089 }, { "epoch": 0.07, "grad_norm": 2.3356093328391476, "learning_rate": 9.95862209756866e-06, "loss": 0.8294, "step": 1090 }, { "epoch": 0.07, "grad_norm": 1.7601633183800116, "learning_rate": 9.958488912956961e-06, "loss": 0.795, "step": 1091 }, { "epoch": 0.07, "grad_norm": 1.496798505954813, "learning_rate": 9.958355515239828e-06, "loss": 0.7658, "step": 1092 }, { "epoch": 0.07, "grad_norm": 3.034837968778576, "learning_rate": 9.958221904422993e-06, "loss": 0.748, "step": 1093 }, { "epoch": 0.07, "grad_norm": 1.8537348872566588, "learning_rate": 9.9580880805122e-06, "loss": 0.8184, "step": 1094 }, { "epoch": 0.07, "grad_norm": 2.0459234746672803, "learning_rate": 9.9579540435132e-06, "loss": 0.7839, "step": 1095 }, { "epoch": 0.07, "grad_norm": 1.6171222240600525, "learning_rate": 9.957819793431754e-06, "loss": 0.7796, "step": 1096 }, { "epoch": 0.07, "grad_norm": 1.900426963549429, "learning_rate": 9.957685330273628e-06, "loss": 0.791, "step": 1097 }, { "epoch": 0.07, "grad_norm": 1.5564355694287642, "learning_rate": 9.957550654044606e-06, "loss": 0.8369, "step": 1098 }, { "epoch": 0.07, "grad_norm": 1.8435439936687874, "learning_rate": 9.957415764750474e-06, "loss": 0.8619, "step": 1099 }, { "epoch": 0.07, "grad_norm": 1.3765224609260172, "learning_rate": 9.957280662397031e-06, "loss": 0.6142, "step": 1100 }, { "epoch": 0.07, "grad_norm": 1.9318395231051473, "learning_rate": 9.957145346990079e-06, "loss": 0.76, "step": 1101 }, { "epoch": 0.07, "grad_norm": 1.710133202675023, "learning_rate": 9.957009818535438e-06, "loss": 0.822, "step": 1102 }, { "epoch": 0.07, "grad_norm": 1.7776448263571023, "learning_rate": 9.95687407703893e-06, "loss": 0.7727, "step": 1103 }, { "epoch": 0.07, "grad_norm": 1.7426742700213766, "learning_rate": 9.95673812250639e-06, "loss": 0.8478, "step": 1104 }, { "epoch": 0.07, "grad_norm": 2.043699842351303, "learning_rate": 9.956601954943664e-06, "loss": 0.9611, "step": 1105 }, { "epoch": 0.07, "grad_norm": 1.1367047679604216, "learning_rate": 9.956465574356598e-06, "loss": 0.6144, "step": 1106 }, { "epoch": 0.07, "grad_norm": 1.666585191227357, "learning_rate": 9.956328980751057e-06, "loss": 0.7574, "step": 1107 }, { "epoch": 0.07, "grad_norm": 1.7278619398123918, "learning_rate": 9.956192174132912e-06, "loss": 0.6696, "step": 1108 }, { "epoch": 0.07, "grad_norm": 1.7373400950947664, "learning_rate": 9.956055154508042e-06, "loss": 0.8614, "step": 1109 }, { "epoch": 0.07, "grad_norm": 1.6529854696932778, "learning_rate": 9.955917921882334e-06, "loss": 0.9042, "step": 1110 }, { "epoch": 0.07, "grad_norm": 1.5232965167609005, "learning_rate": 9.955780476261689e-06, "loss": 0.7652, "step": 1111 }, { "epoch": 0.07, "grad_norm": 1.6852033631420706, "learning_rate": 9.955642817652012e-06, "loss": 0.8016, "step": 1112 }, { "epoch": 0.07, "grad_norm": 1.8723467631265593, "learning_rate": 9.955504946059221e-06, "loss": 0.8042, "step": 1113 }, { "epoch": 0.07, "grad_norm": 1.1640083199400821, "learning_rate": 9.95536686148924e-06, "loss": 0.7454, "step": 1114 }, { "epoch": 0.07, "grad_norm": 1.0380129983280817, "learning_rate": 9.955228563948003e-06, "loss": 0.5346, "step": 1115 }, { "epoch": 0.07, "grad_norm": 1.5352505415370024, "learning_rate": 9.955090053441455e-06, "loss": 0.7755, "step": 1116 }, { "epoch": 0.07, "grad_norm": 1.9733123669624468, "learning_rate": 9.954951329975549e-06, "loss": 0.8303, "step": 1117 }, { "epoch": 0.07, "grad_norm": 1.5820628060874122, "learning_rate": 9.954812393556245e-06, "loss": 0.7358, "step": 1118 }, { "epoch": 0.07, "grad_norm": 1.860162856479875, "learning_rate": 9.954673244189518e-06, "loss": 0.7443, "step": 1119 }, { "epoch": 0.07, "grad_norm": 1.4054308490749456, "learning_rate": 9.954533881881346e-06, "loss": 0.6975, "step": 1120 }, { "epoch": 0.07, "grad_norm": 1.9760139242028396, "learning_rate": 9.954394306637719e-06, "loss": 0.8353, "step": 1121 }, { "epoch": 0.07, "grad_norm": 1.692048031270987, "learning_rate": 9.954254518464633e-06, "loss": 0.8634, "step": 1122 }, { "epoch": 0.07, "grad_norm": 1.6940041505765069, "learning_rate": 9.9541145173681e-06, "loss": 0.75, "step": 1123 }, { "epoch": 0.07, "grad_norm": 1.977380107767541, "learning_rate": 9.953974303354136e-06, "loss": 0.8059, "step": 1124 }, { "epoch": 0.07, "grad_norm": 1.7016014487652305, "learning_rate": 9.953833876428763e-06, "loss": 0.8781, "step": 1125 }, { "epoch": 0.07, "grad_norm": 1.588651813285505, "learning_rate": 9.95369323659802e-06, "loss": 0.8387, "step": 1126 }, { "epoch": 0.07, "grad_norm": 2.1667400807496877, "learning_rate": 9.953552383867953e-06, "loss": 0.7914, "step": 1127 }, { "epoch": 0.07, "grad_norm": 1.5158534652812161, "learning_rate": 9.953411318244612e-06, "loss": 0.8109, "step": 1128 }, { "epoch": 0.07, "grad_norm": 1.707352159999912, "learning_rate": 9.953270039734063e-06, "loss": 0.8516, "step": 1129 }, { "epoch": 0.07, "grad_norm": 2.833428297487808, "learning_rate": 9.953128548342372e-06, "loss": 0.775, "step": 1130 }, { "epoch": 0.07, "grad_norm": 1.4088668420457169, "learning_rate": 9.952986844075628e-06, "loss": 0.71, "step": 1131 }, { "epoch": 0.07, "grad_norm": 1.8832276569626238, "learning_rate": 9.952844926939916e-06, "loss": 0.7537, "step": 1132 }, { "epoch": 0.07, "grad_norm": 1.3007947032733025, "learning_rate": 9.952702796941334e-06, "loss": 0.6506, "step": 1133 }, { "epoch": 0.07, "grad_norm": 4.327116050513365, "learning_rate": 9.952560454085995e-06, "loss": 0.7211, "step": 1134 }, { "epoch": 0.07, "grad_norm": 1.707500099254334, "learning_rate": 9.952417898380015e-06, "loss": 0.8306, "step": 1135 }, { "epoch": 0.07, "grad_norm": 2.1914101846380847, "learning_rate": 9.952275129829518e-06, "loss": 0.8003, "step": 1136 }, { "epoch": 0.07, "grad_norm": 1.9013446394667735, "learning_rate": 9.952132148440644e-06, "loss": 0.7841, "step": 1137 }, { "epoch": 0.07, "grad_norm": 1.632032251339857, "learning_rate": 9.951988954219535e-06, "loss": 0.884, "step": 1138 }, { "epoch": 0.07, "grad_norm": 1.5975182717549823, "learning_rate": 9.951845547172347e-06, "loss": 0.68, "step": 1139 }, { "epoch": 0.07, "grad_norm": 1.8449249469898525, "learning_rate": 9.951701927305244e-06, "loss": 0.806, "step": 1140 }, { "epoch": 0.07, "grad_norm": 1.901596378493549, "learning_rate": 9.951558094624395e-06, "loss": 0.8164, "step": 1141 }, { "epoch": 0.07, "grad_norm": 1.1166777193838908, "learning_rate": 9.951414049135984e-06, "loss": 0.7378, "step": 1142 }, { "epoch": 0.07, "grad_norm": 1.689242372108656, "learning_rate": 9.951269790846204e-06, "loss": 0.8318, "step": 1143 }, { "epoch": 0.07, "grad_norm": 1.419038114746601, "learning_rate": 9.951125319761248e-06, "loss": 0.6679, "step": 1144 }, { "epoch": 0.07, "grad_norm": 1.941713247914093, "learning_rate": 9.950980635887332e-06, "loss": 0.7889, "step": 1145 }, { "epoch": 0.07, "grad_norm": 1.6433131509294976, "learning_rate": 9.950835739230671e-06, "loss": 0.8394, "step": 1146 }, { "epoch": 0.07, "grad_norm": 1.7382570197462783, "learning_rate": 9.950690629797494e-06, "loss": 0.7771, "step": 1147 }, { "epoch": 0.07, "grad_norm": 1.46908218954222, "learning_rate": 9.950545307594037e-06, "loss": 0.7462, "step": 1148 }, { "epoch": 0.07, "grad_norm": 1.469420541796959, "learning_rate": 9.950399772626545e-06, "loss": 0.7614, "step": 1149 }, { "epoch": 0.07, "grad_norm": 1.7845372663846204, "learning_rate": 9.95025402490127e-06, "loss": 0.8045, "step": 1150 }, { "epoch": 0.07, "grad_norm": 1.827039354253548, "learning_rate": 9.950108064424482e-06, "loss": 0.8305, "step": 1151 }, { "epoch": 0.07, "grad_norm": 1.8116641153712327, "learning_rate": 9.94996189120245e-06, "loss": 0.7443, "step": 1152 }, { "epoch": 0.07, "grad_norm": 1.6781960766456137, "learning_rate": 9.949815505241458e-06, "loss": 0.8208, "step": 1153 }, { "epoch": 0.07, "grad_norm": 1.8357658492237041, "learning_rate": 9.949668906547798e-06, "loss": 0.8276, "step": 1154 }, { "epoch": 0.07, "grad_norm": 1.0900557362982037, "learning_rate": 9.949522095127765e-06, "loss": 0.6201, "step": 1155 }, { "epoch": 0.07, "grad_norm": 1.0942335509736245, "learning_rate": 9.949375070987676e-06, "loss": 0.6981, "step": 1156 }, { "epoch": 0.07, "grad_norm": 1.6561339256317356, "learning_rate": 9.949227834133845e-06, "loss": 0.7517, "step": 1157 }, { "epoch": 0.07, "grad_norm": 2.866869775218805, "learning_rate": 9.949080384572602e-06, "loss": 0.7184, "step": 1158 }, { "epoch": 0.07, "grad_norm": 1.6793195090531714, "learning_rate": 9.948932722310283e-06, "loss": 0.8581, "step": 1159 }, { "epoch": 0.07, "grad_norm": 1.1332013990287038, "learning_rate": 9.948784847353237e-06, "loss": 0.6433, "step": 1160 }, { "epoch": 0.07, "grad_norm": 2.2287125096698035, "learning_rate": 9.948636759707815e-06, "loss": 0.8087, "step": 1161 }, { "epoch": 0.07, "grad_norm": 1.9922513047613646, "learning_rate": 9.948488459380382e-06, "loss": 0.7525, "step": 1162 }, { "epoch": 0.07, "grad_norm": 2.0843284653062115, "learning_rate": 9.948339946377316e-06, "loss": 0.8381, "step": 1163 }, { "epoch": 0.07, "grad_norm": 1.6825912804223166, "learning_rate": 9.948191220704996e-06, "loss": 0.7425, "step": 1164 }, { "epoch": 0.07, "grad_norm": 1.303418325761962, "learning_rate": 9.948042282369816e-06, "loss": 0.7211, "step": 1165 }, { "epoch": 0.07, "grad_norm": 2.1585047538146407, "learning_rate": 9.947893131378174e-06, "loss": 0.7652, "step": 1166 }, { "epoch": 0.07, "grad_norm": 1.6801806953926153, "learning_rate": 9.947743767736485e-06, "loss": 0.8231, "step": 1167 }, { "epoch": 0.07, "grad_norm": 1.7355364633081918, "learning_rate": 9.947594191451162e-06, "loss": 0.8681, "step": 1168 }, { "epoch": 0.07, "grad_norm": 1.5036875528182982, "learning_rate": 9.947444402528638e-06, "loss": 0.7648, "step": 1169 }, { "epoch": 0.07, "grad_norm": 1.9624428328399333, "learning_rate": 9.947294400975352e-06, "loss": 1.0013, "step": 1170 }, { "epoch": 0.07, "grad_norm": 1.78920661005376, "learning_rate": 9.947144186797747e-06, "loss": 0.8601, "step": 1171 }, { "epoch": 0.08, "grad_norm": 1.687312573421913, "learning_rate": 9.946993760002278e-06, "loss": 0.7667, "step": 1172 }, { "epoch": 0.08, "grad_norm": 3.080980571307982, "learning_rate": 9.946843120595415e-06, "loss": 0.8612, "step": 1173 }, { "epoch": 0.08, "grad_norm": 1.7858736733747134, "learning_rate": 9.946692268583628e-06, "loss": 0.8619, "step": 1174 }, { "epoch": 0.08, "grad_norm": 1.6111648184062541, "learning_rate": 9.946541203973402e-06, "loss": 0.7232, "step": 1175 }, { "epoch": 0.08, "grad_norm": 1.5585611652127132, "learning_rate": 9.94638992677123e-06, "loss": 0.7487, "step": 1176 }, { "epoch": 0.08, "grad_norm": 1.6343265650888825, "learning_rate": 9.94623843698361e-06, "loss": 0.8006, "step": 1177 }, { "epoch": 0.08, "grad_norm": 1.773454217150582, "learning_rate": 9.946086734617058e-06, "loss": 0.8202, "step": 1178 }, { "epoch": 0.08, "grad_norm": 1.7203037793120888, "learning_rate": 9.945934819678092e-06, "loss": 0.7699, "step": 1179 }, { "epoch": 0.08, "grad_norm": 1.8401767925875208, "learning_rate": 9.945782692173239e-06, "loss": 0.757, "step": 1180 }, { "epoch": 0.08, "grad_norm": 1.6294072722008182, "learning_rate": 9.945630352109039e-06, "loss": 0.8173, "step": 1181 }, { "epoch": 0.08, "grad_norm": 1.9067259217344887, "learning_rate": 9.945477799492038e-06, "loss": 0.8047, "step": 1182 }, { "epoch": 0.08, "grad_norm": 1.2452604476375928, "learning_rate": 9.945325034328791e-06, "loss": 0.6973, "step": 1183 }, { "epoch": 0.08, "grad_norm": 1.7285218554817683, "learning_rate": 9.94517205662587e-06, "loss": 0.8251, "step": 1184 }, { "epoch": 0.08, "grad_norm": 1.74846448238479, "learning_rate": 9.945018866389844e-06, "loss": 0.6944, "step": 1185 }, { "epoch": 0.08, "grad_norm": 1.713103350547022, "learning_rate": 9.944865463627295e-06, "loss": 0.7544, "step": 1186 }, { "epoch": 0.08, "grad_norm": 1.1240862474110245, "learning_rate": 9.944711848344822e-06, "loss": 0.7982, "step": 1187 }, { "epoch": 0.08, "grad_norm": 1.4311823471188352, "learning_rate": 9.944558020549024e-06, "loss": 0.7716, "step": 1188 }, { "epoch": 0.08, "grad_norm": 1.8634720110159253, "learning_rate": 9.94440398024651e-06, "loss": 0.7758, "step": 1189 }, { "epoch": 0.08, "grad_norm": 1.6731201840728147, "learning_rate": 9.944249727443904e-06, "loss": 0.8325, "step": 1190 }, { "epoch": 0.08, "grad_norm": 1.6305184377129507, "learning_rate": 9.944095262147835e-06, "loss": 0.7185, "step": 1191 }, { "epoch": 0.08, "grad_norm": 2.0466729604562244, "learning_rate": 9.94394058436494e-06, "loss": 0.7834, "step": 1192 }, { "epoch": 0.08, "grad_norm": 1.76502570342319, "learning_rate": 9.943785694101865e-06, "loss": 0.7513, "step": 1193 }, { "epoch": 0.08, "grad_norm": 1.3599805013104025, "learning_rate": 9.943630591365272e-06, "loss": 0.6696, "step": 1194 }, { "epoch": 0.08, "grad_norm": 1.4057185805210182, "learning_rate": 9.943475276161823e-06, "loss": 0.7532, "step": 1195 }, { "epoch": 0.08, "grad_norm": 1.2311953495357266, "learning_rate": 9.943319748498195e-06, "loss": 0.6457, "step": 1196 }, { "epoch": 0.08, "grad_norm": 2.075889344417564, "learning_rate": 9.943164008381072e-06, "loss": 0.7998, "step": 1197 }, { "epoch": 0.08, "grad_norm": 1.8289753300474163, "learning_rate": 9.943008055817145e-06, "loss": 0.8385, "step": 1198 }, { "epoch": 0.08, "grad_norm": 2.0181246195397557, "learning_rate": 9.94285189081312e-06, "loss": 0.7375, "step": 1199 }, { "epoch": 0.08, "grad_norm": 1.5762076121215178, "learning_rate": 9.942695513375707e-06, "loss": 0.7464, "step": 1200 }, { "epoch": 0.08, "grad_norm": 1.5111170733779946, "learning_rate": 9.942538923511627e-06, "loss": 0.7428, "step": 1201 }, { "epoch": 0.08, "grad_norm": 2.17878117988025, "learning_rate": 9.942382121227608e-06, "loss": 0.8015, "step": 1202 }, { "epoch": 0.08, "grad_norm": 1.6119623742446363, "learning_rate": 9.942225106530391e-06, "loss": 0.8133, "step": 1203 }, { "epoch": 0.08, "grad_norm": 1.8226563105799043, "learning_rate": 9.942067879426727e-06, "loss": 0.8503, "step": 1204 }, { "epoch": 0.08, "grad_norm": 1.616622982869557, "learning_rate": 9.941910439923367e-06, "loss": 0.8194, "step": 1205 }, { "epoch": 0.08, "grad_norm": 2.0378917263914453, "learning_rate": 9.941752788027083e-06, "loss": 0.7868, "step": 1206 }, { "epoch": 0.08, "grad_norm": 2.0481200850643693, "learning_rate": 9.941594923744647e-06, "loss": 0.7416, "step": 1207 }, { "epoch": 0.08, "grad_norm": 1.6430009069063807, "learning_rate": 9.941436847082845e-06, "loss": 0.7424, "step": 1208 }, { "epoch": 0.08, "grad_norm": 1.6477451836442019, "learning_rate": 9.941278558048471e-06, "loss": 0.8084, "step": 1209 }, { "epoch": 0.08, "grad_norm": 1.678220082648698, "learning_rate": 9.941120056648329e-06, "loss": 0.7814, "step": 1210 }, { "epoch": 0.08, "grad_norm": 1.8901707647491564, "learning_rate": 9.940961342889225e-06, "loss": 0.8217, "step": 1211 }, { "epoch": 0.08, "grad_norm": 2.0048330926240294, "learning_rate": 9.94080241677799e-06, "loss": 0.7882, "step": 1212 }, { "epoch": 0.08, "grad_norm": 1.4745306740587227, "learning_rate": 9.940643278321447e-06, "loss": 0.7883, "step": 1213 }, { "epoch": 0.08, "grad_norm": 1.6761003183749128, "learning_rate": 9.940483927526438e-06, "loss": 0.8845, "step": 1214 }, { "epoch": 0.08, "grad_norm": 1.6985775387118394, "learning_rate": 9.94032436439981e-06, "loss": 0.7849, "step": 1215 }, { "epoch": 0.08, "grad_norm": 2.0247900959534313, "learning_rate": 9.940164588948424e-06, "loss": 0.8366, "step": 1216 }, { "epoch": 0.08, "grad_norm": 1.6342422344350944, "learning_rate": 9.940004601179143e-06, "loss": 0.772, "step": 1217 }, { "epoch": 0.08, "grad_norm": 4.937359748532378, "learning_rate": 9.939844401098845e-06, "loss": 0.7858, "step": 1218 }, { "epoch": 0.08, "grad_norm": 1.72734845543254, "learning_rate": 9.939683988714415e-06, "loss": 0.7708, "step": 1219 }, { "epoch": 0.08, "grad_norm": 1.7382671961324305, "learning_rate": 9.939523364032746e-06, "loss": 0.7379, "step": 1220 }, { "epoch": 0.08, "grad_norm": 1.7028896107380764, "learning_rate": 9.939362527060743e-06, "loss": 0.8792, "step": 1221 }, { "epoch": 0.08, "grad_norm": 1.6281437007347392, "learning_rate": 9.939201477805318e-06, "loss": 0.7893, "step": 1222 }, { "epoch": 0.08, "grad_norm": 1.640094749823017, "learning_rate": 9.939040216273392e-06, "loss": 0.7971, "step": 1223 }, { "epoch": 0.08, "grad_norm": 1.170163333554324, "learning_rate": 9.938878742471896e-06, "loss": 0.6503, "step": 1224 }, { "epoch": 0.08, "grad_norm": 1.6054510394104053, "learning_rate": 9.938717056407768e-06, "loss": 0.7673, "step": 1225 }, { "epoch": 0.08, "grad_norm": 1.7201327986167698, "learning_rate": 9.93855515808796e-06, "loss": 0.7175, "step": 1226 }, { "epoch": 0.08, "grad_norm": 1.5045320940797886, "learning_rate": 9.938393047519428e-06, "loss": 0.7819, "step": 1227 }, { "epoch": 0.08, "grad_norm": 1.6837916509289097, "learning_rate": 9.938230724709141e-06, "loss": 0.7814, "step": 1228 }, { "epoch": 0.08, "grad_norm": 1.4764560009598522, "learning_rate": 9.938068189664073e-06, "loss": 0.7219, "step": 1229 }, { "epoch": 0.08, "grad_norm": 1.436705500538068, "learning_rate": 9.937905442391211e-06, "loss": 0.7531, "step": 1230 }, { "epoch": 0.08, "grad_norm": 1.6172052150364173, "learning_rate": 9.937742482897549e-06, "loss": 0.734, "step": 1231 }, { "epoch": 0.08, "grad_norm": 1.8846628824983487, "learning_rate": 9.937579311190092e-06, "loss": 0.8432, "step": 1232 }, { "epoch": 0.08, "grad_norm": 1.9964698150499274, "learning_rate": 9.937415927275848e-06, "loss": 0.8955, "step": 1233 }, { "epoch": 0.08, "grad_norm": 1.5517081559929293, "learning_rate": 9.937252331161845e-06, "loss": 0.7353, "step": 1234 }, { "epoch": 0.08, "grad_norm": 1.5087373617236775, "learning_rate": 9.937088522855111e-06, "loss": 0.5812, "step": 1235 }, { "epoch": 0.08, "grad_norm": 1.9568817956142124, "learning_rate": 9.936924502362687e-06, "loss": 0.7231, "step": 1236 }, { "epoch": 0.08, "grad_norm": 1.9497242236388612, "learning_rate": 9.936760269691621e-06, "loss": 0.8335, "step": 1237 }, { "epoch": 0.08, "grad_norm": 1.3644893576772221, "learning_rate": 9.936595824848972e-06, "loss": 0.727, "step": 1238 }, { "epoch": 0.08, "grad_norm": 1.835221486581115, "learning_rate": 9.936431167841808e-06, "loss": 0.9053, "step": 1239 }, { "epoch": 0.08, "grad_norm": 1.6139103690320111, "learning_rate": 9.936266298677206e-06, "loss": 0.8143, "step": 1240 }, { "epoch": 0.08, "grad_norm": 1.11013169616772, "learning_rate": 9.93610121736225e-06, "loss": 0.6761, "step": 1241 }, { "epoch": 0.08, "grad_norm": 1.7713263673995723, "learning_rate": 9.935935923904037e-06, "loss": 0.8557, "step": 1242 }, { "epoch": 0.08, "grad_norm": 1.747567154607575, "learning_rate": 9.935770418309667e-06, "loss": 0.8153, "step": 1243 }, { "epoch": 0.08, "grad_norm": 1.7440452685124685, "learning_rate": 9.93560470058626e-06, "loss": 0.8936, "step": 1244 }, { "epoch": 0.08, "grad_norm": 1.8822388943040846, "learning_rate": 9.93543877074093e-06, "loss": 0.7644, "step": 1245 }, { "epoch": 0.08, "grad_norm": 1.6888940662225753, "learning_rate": 9.935272628780815e-06, "loss": 0.8323, "step": 1246 }, { "epoch": 0.08, "grad_norm": 1.590349080429308, "learning_rate": 9.935106274713053e-06, "loss": 0.6997, "step": 1247 }, { "epoch": 0.08, "grad_norm": 1.6504010234940556, "learning_rate": 9.934939708544792e-06, "loss": 0.7471, "step": 1248 }, { "epoch": 0.08, "grad_norm": 1.6626744932102357, "learning_rate": 9.934772930283194e-06, "loss": 0.7497, "step": 1249 }, { "epoch": 0.08, "grad_norm": 1.7654016209714574, "learning_rate": 9.934605939935423e-06, "loss": 0.7949, "step": 1250 }, { "epoch": 0.08, "grad_norm": 1.7535859678975798, "learning_rate": 9.934438737508658e-06, "loss": 0.8043, "step": 1251 }, { "epoch": 0.08, "grad_norm": 1.838661443030837, "learning_rate": 9.934271323010085e-06, "loss": 0.7949, "step": 1252 }, { "epoch": 0.08, "grad_norm": 1.6920407216778424, "learning_rate": 9.9341036964469e-06, "loss": 0.6926, "step": 1253 }, { "epoch": 0.08, "grad_norm": 2.042516417008944, "learning_rate": 9.933935857826304e-06, "loss": 0.7247, "step": 1254 }, { "epoch": 0.08, "grad_norm": 1.8395731079468596, "learning_rate": 9.933767807155515e-06, "loss": 0.7439, "step": 1255 }, { "epoch": 0.08, "grad_norm": 1.8744261067343555, "learning_rate": 9.933599544441752e-06, "loss": 0.746, "step": 1256 }, { "epoch": 0.08, "grad_norm": 1.7440714094502083, "learning_rate": 9.933431069692245e-06, "loss": 0.7541, "step": 1257 }, { "epoch": 0.08, "grad_norm": 1.4706013134460827, "learning_rate": 9.933262382914239e-06, "loss": 0.7269, "step": 1258 }, { "epoch": 0.08, "grad_norm": 1.8420089496107785, "learning_rate": 9.933093484114983e-06, "loss": 0.7088, "step": 1259 }, { "epoch": 0.08, "grad_norm": 1.9768406221141246, "learning_rate": 9.932924373301735e-06, "loss": 0.7833, "step": 1260 }, { "epoch": 0.08, "grad_norm": 1.2662643496477584, "learning_rate": 9.932755050481762e-06, "loss": 0.6532, "step": 1261 }, { "epoch": 0.08, "grad_norm": 2.123002299709827, "learning_rate": 9.932585515662341e-06, "loss": 0.7464, "step": 1262 }, { "epoch": 0.08, "grad_norm": 1.6131262508309283, "learning_rate": 9.93241576885076e-06, "loss": 0.7963, "step": 1263 }, { "epoch": 0.08, "grad_norm": 1.2084959071360502, "learning_rate": 9.932245810054315e-06, "loss": 0.5883, "step": 1264 }, { "epoch": 0.08, "grad_norm": 1.8214076128545635, "learning_rate": 9.932075639280308e-06, "loss": 0.78, "step": 1265 }, { "epoch": 0.08, "grad_norm": 1.48180982616666, "learning_rate": 9.931905256536053e-06, "loss": 0.7586, "step": 1266 }, { "epoch": 0.08, "grad_norm": 1.5317908431726932, "learning_rate": 9.931734661828876e-06, "loss": 0.8576, "step": 1267 }, { "epoch": 0.08, "grad_norm": 2.1188774573113682, "learning_rate": 9.931563855166104e-06, "loss": 0.7986, "step": 1268 }, { "epoch": 0.08, "grad_norm": 1.5954411589543958, "learning_rate": 9.931392836555081e-06, "loss": 0.7487, "step": 1269 }, { "epoch": 0.08, "grad_norm": 1.0940735835169852, "learning_rate": 9.931221606003156e-06, "loss": 0.7042, "step": 1270 }, { "epoch": 0.08, "grad_norm": 2.0448021308004143, "learning_rate": 9.931050163517688e-06, "loss": 0.859, "step": 1271 }, { "epoch": 0.08, "grad_norm": 1.8965824691039321, "learning_rate": 9.930878509106046e-06, "loss": 0.7419, "step": 1272 }, { "epoch": 0.08, "grad_norm": 1.7076753593949636, "learning_rate": 9.930706642775607e-06, "loss": 0.7478, "step": 1273 }, { "epoch": 0.08, "grad_norm": 1.5192139414357664, "learning_rate": 9.930534564533757e-06, "loss": 0.7155, "step": 1274 }, { "epoch": 0.08, "grad_norm": 1.20250491316649, "learning_rate": 9.930362274387892e-06, "loss": 0.6708, "step": 1275 }, { "epoch": 0.08, "grad_norm": 1.6340935737018734, "learning_rate": 9.930189772345416e-06, "loss": 0.8683, "step": 1276 }, { "epoch": 0.08, "grad_norm": 1.0073445910612708, "learning_rate": 9.930017058413745e-06, "loss": 0.6075, "step": 1277 }, { "epoch": 0.08, "grad_norm": 1.8300304718244806, "learning_rate": 9.929844132600299e-06, "loss": 0.7358, "step": 1278 }, { "epoch": 0.08, "grad_norm": 1.7236047710832867, "learning_rate": 9.92967099491251e-06, "loss": 0.8166, "step": 1279 }, { "epoch": 0.08, "grad_norm": 2.2300659641775553, "learning_rate": 9.929497645357822e-06, "loss": 0.8629, "step": 1280 }, { "epoch": 0.08, "grad_norm": 1.11816232283961, "learning_rate": 9.929324083943683e-06, "loss": 0.7209, "step": 1281 }, { "epoch": 0.08, "grad_norm": 1.64728478812775, "learning_rate": 9.929150310677553e-06, "loss": 0.7983, "step": 1282 }, { "epoch": 0.08, "grad_norm": 2.1506401476194954, "learning_rate": 9.9289763255669e-06, "loss": 0.8027, "step": 1283 }, { "epoch": 0.08, "grad_norm": 1.7177442938868839, "learning_rate": 9.928802128619201e-06, "loss": 0.6545, "step": 1284 }, { "epoch": 0.08, "grad_norm": 1.5716525763928897, "learning_rate": 9.928627719841945e-06, "loss": 0.7177, "step": 1285 }, { "epoch": 0.08, "grad_norm": 1.9174569446137926, "learning_rate": 9.928453099242625e-06, "loss": 0.8108, "step": 1286 }, { "epoch": 0.08, "grad_norm": 1.6159242966974348, "learning_rate": 9.928278266828747e-06, "loss": 0.7816, "step": 1287 }, { "epoch": 0.08, "grad_norm": 1.64360083142903, "learning_rate": 9.928103222607825e-06, "loss": 0.7897, "step": 1288 }, { "epoch": 0.08, "grad_norm": 1.5724315120832915, "learning_rate": 9.927927966587383e-06, "loss": 0.837, "step": 1289 }, { "epoch": 0.08, "grad_norm": 2.062961121344956, "learning_rate": 9.92775249877495e-06, "loss": 0.9035, "step": 1290 }, { "epoch": 0.08, "grad_norm": 1.6167881686472318, "learning_rate": 9.927576819178071e-06, "loss": 0.851, "step": 1291 }, { "epoch": 0.08, "grad_norm": 1.6232717114300337, "learning_rate": 9.927400927804294e-06, "loss": 0.7514, "step": 1292 }, { "epoch": 0.08, "grad_norm": 1.6395021022514675, "learning_rate": 9.927224824661179e-06, "loss": 0.7512, "step": 1293 }, { "epoch": 0.08, "grad_norm": 1.61301678929123, "learning_rate": 9.927048509756294e-06, "loss": 0.7973, "step": 1294 }, { "epoch": 0.08, "grad_norm": 1.7497618070233183, "learning_rate": 9.926871983097217e-06, "loss": 0.8282, "step": 1295 }, { "epoch": 0.08, "grad_norm": 1.718119996025925, "learning_rate": 9.926695244691536e-06, "loss": 0.8607, "step": 1296 }, { "epoch": 0.08, "grad_norm": 1.6134105627139295, "learning_rate": 9.926518294546846e-06, "loss": 0.8105, "step": 1297 }, { "epoch": 0.08, "grad_norm": 2.886181830396174, "learning_rate": 9.92634113267075e-06, "loss": 0.8302, "step": 1298 }, { "epoch": 0.08, "grad_norm": 1.6054936846713401, "learning_rate": 9.926163759070863e-06, "loss": 0.8309, "step": 1299 }, { "epoch": 0.08, "grad_norm": 1.6608516386218362, "learning_rate": 9.92598617375481e-06, "loss": 0.8541, "step": 1300 }, { "epoch": 0.08, "grad_norm": 1.313985347547988, "learning_rate": 9.925808376730225e-06, "loss": 0.6211, "step": 1301 }, { "epoch": 0.08, "grad_norm": 1.6105590675446355, "learning_rate": 9.925630368004744e-06, "loss": 0.7915, "step": 1302 }, { "epoch": 0.08, "grad_norm": 1.654134223395328, "learning_rate": 9.925452147586022e-06, "loss": 0.8326, "step": 1303 }, { "epoch": 0.08, "grad_norm": 1.834183341210744, "learning_rate": 9.925273715481713e-06, "loss": 0.7752, "step": 1304 }, { "epoch": 0.08, "grad_norm": 1.7108439768047834, "learning_rate": 9.925095071699491e-06, "loss": 0.7205, "step": 1305 }, { "epoch": 0.08, "grad_norm": 1.784084558392031, "learning_rate": 9.924916216247033e-06, "loss": 0.7145, "step": 1306 }, { "epoch": 0.08, "grad_norm": 1.6395708610717354, "learning_rate": 9.924737149132022e-06, "loss": 0.7648, "step": 1307 }, { "epoch": 0.08, "grad_norm": 1.734175442461579, "learning_rate": 9.92455787036216e-06, "loss": 0.8767, "step": 1308 }, { "epoch": 0.08, "grad_norm": 1.5022835284478722, "learning_rate": 9.924378379945145e-06, "loss": 0.7897, "step": 1309 }, { "epoch": 0.08, "grad_norm": 0.9762330683723645, "learning_rate": 9.924198677888696e-06, "loss": 0.6286, "step": 1310 }, { "epoch": 0.08, "grad_norm": 1.6490348542398754, "learning_rate": 9.924018764200538e-06, "loss": 0.8461, "step": 1311 }, { "epoch": 0.08, "grad_norm": 1.7436468616171612, "learning_rate": 9.923838638888397e-06, "loss": 0.7932, "step": 1312 }, { "epoch": 0.08, "grad_norm": 2.805857620684112, "learning_rate": 9.923658301960018e-06, "loss": 0.7757, "step": 1313 }, { "epoch": 0.08, "grad_norm": 1.7640589532786841, "learning_rate": 9.923477753423151e-06, "loss": 0.7875, "step": 1314 }, { "epoch": 0.08, "grad_norm": 1.8438329410483, "learning_rate": 9.923296993285558e-06, "loss": 0.8731, "step": 1315 }, { "epoch": 0.08, "grad_norm": 1.5697137342096394, "learning_rate": 9.923116021555003e-06, "loss": 0.7095, "step": 1316 }, { "epoch": 0.08, "grad_norm": 1.6502393639279858, "learning_rate": 9.922934838239269e-06, "loss": 0.8164, "step": 1317 }, { "epoch": 0.08, "grad_norm": 1.5276199472029603, "learning_rate": 9.922753443346137e-06, "loss": 0.7069, "step": 1318 }, { "epoch": 0.08, "grad_norm": 1.6518999824197815, "learning_rate": 9.922571836883408e-06, "loss": 0.7686, "step": 1319 }, { "epoch": 0.08, "grad_norm": 3.0065534215787495, "learning_rate": 9.922390018858885e-06, "loss": 0.783, "step": 1320 }, { "epoch": 0.08, "grad_norm": 1.637431226170952, "learning_rate": 9.922207989280383e-06, "loss": 0.6654, "step": 1321 }, { "epoch": 0.08, "grad_norm": 1.7676100841952913, "learning_rate": 9.922025748155724e-06, "loss": 0.8373, "step": 1322 }, { "epoch": 0.08, "grad_norm": 1.6663539496830173, "learning_rate": 9.92184329549274e-06, "loss": 0.7606, "step": 1323 }, { "epoch": 0.08, "grad_norm": 1.7512918188691238, "learning_rate": 9.921660631299274e-06, "loss": 0.7379, "step": 1324 }, { "epoch": 0.08, "grad_norm": 1.4948572213294324, "learning_rate": 9.921477755583178e-06, "loss": 0.7888, "step": 1325 }, { "epoch": 0.08, "grad_norm": 1.2323747361544266, "learning_rate": 9.921294668352307e-06, "loss": 0.6629, "step": 1326 }, { "epoch": 0.08, "grad_norm": 1.5872787330897817, "learning_rate": 9.921111369614533e-06, "loss": 0.7367, "step": 1327 }, { "epoch": 0.09, "grad_norm": 1.824547554066257, "learning_rate": 9.920927859377733e-06, "loss": 0.794, "step": 1328 }, { "epoch": 0.09, "grad_norm": 1.9464071471415827, "learning_rate": 9.920744137649793e-06, "loss": 0.7787, "step": 1329 }, { "epoch": 0.09, "grad_norm": 1.7418449938047518, "learning_rate": 9.92056020443861e-06, "loss": 0.8148, "step": 1330 }, { "epoch": 0.09, "grad_norm": 1.7628307130429717, "learning_rate": 9.920376059752091e-06, "loss": 0.8886, "step": 1331 }, { "epoch": 0.09, "grad_norm": 2.1298748775344722, "learning_rate": 9.920191703598145e-06, "loss": 0.8744, "step": 1332 }, { "epoch": 0.09, "grad_norm": 1.4262142084577185, "learning_rate": 9.920007135984701e-06, "loss": 0.7639, "step": 1333 }, { "epoch": 0.09, "grad_norm": 1.4747555243108577, "learning_rate": 9.919822356919689e-06, "loss": 0.6192, "step": 1334 }, { "epoch": 0.09, "grad_norm": 1.515134844450925, "learning_rate": 9.919637366411047e-06, "loss": 0.7903, "step": 1335 }, { "epoch": 0.09, "grad_norm": 1.599180433199419, "learning_rate": 9.919452164466731e-06, "loss": 0.8059, "step": 1336 }, { "epoch": 0.09, "grad_norm": 1.5417776785782038, "learning_rate": 9.919266751094698e-06, "loss": 0.7166, "step": 1337 }, { "epoch": 0.09, "grad_norm": 1.5985923931572206, "learning_rate": 9.919081126302915e-06, "loss": 0.741, "step": 1338 }, { "epoch": 0.09, "grad_norm": 2.3732350801681283, "learning_rate": 9.918895290099364e-06, "loss": 0.7878, "step": 1339 }, { "epoch": 0.09, "grad_norm": 1.8144292108654885, "learning_rate": 9.918709242492028e-06, "loss": 0.7435, "step": 1340 }, { "epoch": 0.09, "grad_norm": 1.7476907558822992, "learning_rate": 9.918522983488905e-06, "loss": 0.7669, "step": 1341 }, { "epoch": 0.09, "grad_norm": 1.6495885249079942, "learning_rate": 9.918336513097999e-06, "loss": 0.822, "step": 1342 }, { "epoch": 0.09, "grad_norm": 1.5238848686339252, "learning_rate": 9.918149831327323e-06, "loss": 0.8509, "step": 1343 }, { "epoch": 0.09, "grad_norm": 1.6578954536389297, "learning_rate": 9.917962938184904e-06, "loss": 0.7616, "step": 1344 }, { "epoch": 0.09, "grad_norm": 1.4979952529578673, "learning_rate": 9.917775833678772e-06, "loss": 0.7765, "step": 1345 }, { "epoch": 0.09, "grad_norm": 1.577701679423415, "learning_rate": 9.917588517816967e-06, "loss": 0.8165, "step": 1346 }, { "epoch": 0.09, "grad_norm": 1.9213566051876076, "learning_rate": 9.917400990607542e-06, "loss": 0.7592, "step": 1347 }, { "epoch": 0.09, "grad_norm": 1.5815168240465909, "learning_rate": 9.917213252058554e-06, "loss": 0.8347, "step": 1348 }, { "epoch": 0.09, "grad_norm": 1.9167916930907636, "learning_rate": 9.917025302178074e-06, "loss": 0.7986, "step": 1349 }, { "epoch": 0.09, "grad_norm": 1.7433315042897632, "learning_rate": 9.916837140974178e-06, "loss": 0.8048, "step": 1350 }, { "epoch": 0.09, "grad_norm": 1.5894351475259663, "learning_rate": 9.916648768454955e-06, "loss": 0.8266, "step": 1351 }, { "epoch": 0.09, "grad_norm": 1.5591295846124154, "learning_rate": 9.916460184628498e-06, "loss": 0.6923, "step": 1352 }, { "epoch": 0.09, "grad_norm": 1.8178533607551666, "learning_rate": 9.916271389502911e-06, "loss": 0.8106, "step": 1353 }, { "epoch": 0.09, "grad_norm": 1.4692032860340538, "learning_rate": 9.916082383086314e-06, "loss": 0.7671, "step": 1354 }, { "epoch": 0.09, "grad_norm": 1.7766597614419541, "learning_rate": 9.915893165386824e-06, "loss": 0.8249, "step": 1355 }, { "epoch": 0.09, "grad_norm": 1.3200922955217738, "learning_rate": 9.915703736412576e-06, "loss": 0.7363, "step": 1356 }, { "epoch": 0.09, "grad_norm": 1.8031804748361315, "learning_rate": 9.915514096171711e-06, "loss": 0.8619, "step": 1357 }, { "epoch": 0.09, "grad_norm": 1.5479365596855579, "learning_rate": 9.915324244672379e-06, "loss": 0.8032, "step": 1358 }, { "epoch": 0.09, "grad_norm": 1.6915339963906464, "learning_rate": 9.915134181922739e-06, "loss": 0.8403, "step": 1359 }, { "epoch": 0.09, "grad_norm": 1.5303456373717028, "learning_rate": 9.91494390793096e-06, "loss": 0.7504, "step": 1360 }, { "epoch": 0.09, "grad_norm": 2.1562045436268598, "learning_rate": 9.91475342270522e-06, "loss": 0.8596, "step": 1361 }, { "epoch": 0.09, "grad_norm": 2.307623084503907, "learning_rate": 9.914562726253705e-06, "loss": 0.7808, "step": 1362 }, { "epoch": 0.09, "grad_norm": 2.087259646998216, "learning_rate": 9.914371818584612e-06, "loss": 0.7689, "step": 1363 }, { "epoch": 0.09, "grad_norm": 2.0261816947731788, "learning_rate": 9.914180699706143e-06, "loss": 0.8674, "step": 1364 }, { "epoch": 0.09, "grad_norm": 1.7387421700719452, "learning_rate": 9.913989369626515e-06, "loss": 0.8459, "step": 1365 }, { "epoch": 0.09, "grad_norm": 1.4655885029752738, "learning_rate": 9.913797828353948e-06, "loss": 0.6466, "step": 1366 }, { "epoch": 0.09, "grad_norm": 1.7321981216442495, "learning_rate": 9.913606075896677e-06, "loss": 0.8357, "step": 1367 }, { "epoch": 0.09, "grad_norm": 1.7611007824021798, "learning_rate": 9.913414112262942e-06, "loss": 0.8599, "step": 1368 }, { "epoch": 0.09, "grad_norm": 2.3118587908136057, "learning_rate": 9.913221937460991e-06, "loss": 0.8276, "step": 1369 }, { "epoch": 0.09, "grad_norm": 1.819925550778839, "learning_rate": 9.913029551499087e-06, "loss": 0.7408, "step": 1370 }, { "epoch": 0.09, "grad_norm": 3.354798958813052, "learning_rate": 9.912836954385496e-06, "loss": 0.8534, "step": 1371 }, { "epoch": 0.09, "grad_norm": 1.7098363634996552, "learning_rate": 9.912644146128495e-06, "loss": 0.7628, "step": 1372 }, { "epoch": 0.09, "grad_norm": 1.8227793080153545, "learning_rate": 9.912451126736374e-06, "loss": 0.7491, "step": 1373 }, { "epoch": 0.09, "grad_norm": 1.9011304358717993, "learning_rate": 9.912257896217425e-06, "loss": 0.8641, "step": 1374 }, { "epoch": 0.09, "grad_norm": 1.157576374482274, "learning_rate": 9.912064454579953e-06, "loss": 0.5839, "step": 1375 }, { "epoch": 0.09, "grad_norm": 1.5104102621322544, "learning_rate": 9.911870801832273e-06, "loss": 0.7232, "step": 1376 }, { "epoch": 0.09, "grad_norm": 1.1941701798103137, "learning_rate": 9.911676937982706e-06, "loss": 0.6171, "step": 1377 }, { "epoch": 0.09, "grad_norm": 1.499982408474834, "learning_rate": 9.911482863039587e-06, "loss": 0.776, "step": 1378 }, { "epoch": 0.09, "grad_norm": 1.8277150883720807, "learning_rate": 9.911288577011254e-06, "loss": 0.8183, "step": 1379 }, { "epoch": 0.09, "grad_norm": 1.7544451838334114, "learning_rate": 9.911094079906059e-06, "loss": 0.8516, "step": 1380 }, { "epoch": 0.09, "grad_norm": 1.8457719043592893, "learning_rate": 9.910899371732358e-06, "loss": 0.8667, "step": 1381 }, { "epoch": 0.09, "grad_norm": 1.5877842734099092, "learning_rate": 9.910704452498523e-06, "loss": 0.7869, "step": 1382 }, { "epoch": 0.09, "grad_norm": 2.012340513648958, "learning_rate": 9.91050932221293e-06, "loss": 0.7512, "step": 1383 }, { "epoch": 0.09, "grad_norm": 1.8717682366588637, "learning_rate": 9.910313980883963e-06, "loss": 0.8251, "step": 1384 }, { "epoch": 0.09, "grad_norm": 1.5609337606505982, "learning_rate": 9.910118428520019e-06, "loss": 0.794, "step": 1385 }, { "epoch": 0.09, "grad_norm": 1.7436552386428716, "learning_rate": 9.909922665129503e-06, "loss": 0.8167, "step": 1386 }, { "epoch": 0.09, "grad_norm": 1.6778682383995644, "learning_rate": 9.909726690720829e-06, "loss": 0.7743, "step": 1387 }, { "epoch": 0.09, "grad_norm": 1.6835975098225027, "learning_rate": 9.909530505302417e-06, "loss": 0.7026, "step": 1388 }, { "epoch": 0.09, "grad_norm": 1.670154099027307, "learning_rate": 9.9093341088827e-06, "loss": 0.7819, "step": 1389 }, { "epoch": 0.09, "grad_norm": 1.6362136254526698, "learning_rate": 9.909137501470121e-06, "loss": 0.6768, "step": 1390 }, { "epoch": 0.09, "grad_norm": 1.6322097406015168, "learning_rate": 9.908940683073127e-06, "loss": 0.7409, "step": 1391 }, { "epoch": 0.09, "grad_norm": 1.8015835447202564, "learning_rate": 9.908743653700177e-06, "loss": 0.811, "step": 1392 }, { "epoch": 0.09, "grad_norm": 1.2342518183914997, "learning_rate": 9.90854641335974e-06, "loss": 0.608, "step": 1393 }, { "epoch": 0.09, "grad_norm": 3.770647763229164, "learning_rate": 9.908348962060292e-06, "loss": 0.8096, "step": 1394 }, { "epoch": 0.09, "grad_norm": 1.9850081387399146, "learning_rate": 9.908151299810319e-06, "loss": 0.8629, "step": 1395 }, { "epoch": 0.09, "grad_norm": 1.0201099773552393, "learning_rate": 9.907953426618317e-06, "loss": 0.6485, "step": 1396 }, { "epoch": 0.09, "grad_norm": 1.6823515864516037, "learning_rate": 9.90775534249279e-06, "loss": 0.8213, "step": 1397 }, { "epoch": 0.09, "grad_norm": 1.5865285090311405, "learning_rate": 9.907557047442252e-06, "loss": 0.7715, "step": 1398 }, { "epoch": 0.09, "grad_norm": 1.5544551280487129, "learning_rate": 9.907358541475223e-06, "loss": 0.6427, "step": 1399 }, { "epoch": 0.09, "grad_norm": 1.9215688101996893, "learning_rate": 9.907159824600235e-06, "loss": 0.74, "step": 1400 }, { "epoch": 0.09, "grad_norm": 1.550636687250966, "learning_rate": 9.90696089682583e-06, "loss": 0.7131, "step": 1401 }, { "epoch": 0.09, "grad_norm": 1.6765540876388993, "learning_rate": 9.906761758160556e-06, "loss": 0.7482, "step": 1402 }, { "epoch": 0.09, "grad_norm": 1.5527799073568518, "learning_rate": 9.906562408612973e-06, "loss": 0.8024, "step": 1403 }, { "epoch": 0.09, "grad_norm": 1.6151325201787572, "learning_rate": 9.90636284819165e-06, "loss": 0.7968, "step": 1404 }, { "epoch": 0.09, "grad_norm": 1.550154550587109, "learning_rate": 9.906163076905158e-06, "loss": 0.7269, "step": 1405 }, { "epoch": 0.09, "grad_norm": 1.734437826311685, "learning_rate": 9.905963094762086e-06, "loss": 0.7305, "step": 1406 }, { "epoch": 0.09, "grad_norm": 1.6002036696326096, "learning_rate": 9.90576290177103e-06, "loss": 0.6948, "step": 1407 }, { "epoch": 0.09, "grad_norm": 1.6081671062701834, "learning_rate": 9.905562497940593e-06, "loss": 0.7738, "step": 1408 }, { "epoch": 0.09, "grad_norm": 1.670338949096512, "learning_rate": 9.905361883279388e-06, "loss": 0.7635, "step": 1409 }, { "epoch": 0.09, "grad_norm": 1.7196141923479065, "learning_rate": 9.905161057796037e-06, "loss": 0.7253, "step": 1410 }, { "epoch": 0.09, "grad_norm": 2.7451583816725926, "learning_rate": 9.90496002149917e-06, "loss": 0.748, "step": 1411 }, { "epoch": 0.09, "grad_norm": 1.9577599687674048, "learning_rate": 9.90475877439743e-06, "loss": 0.6582, "step": 1412 }, { "epoch": 0.09, "grad_norm": 1.5871735199484327, "learning_rate": 9.904557316499462e-06, "loss": 0.8124, "step": 1413 }, { "epoch": 0.09, "grad_norm": 1.788992385449905, "learning_rate": 9.904355647813928e-06, "loss": 0.7796, "step": 1414 }, { "epoch": 0.09, "grad_norm": 1.7834291182835276, "learning_rate": 9.904153768349494e-06, "loss": 0.8136, "step": 1415 }, { "epoch": 0.09, "grad_norm": 2.075405339733353, "learning_rate": 9.903951678114833e-06, "loss": 0.5151, "step": 1416 }, { "epoch": 0.09, "grad_norm": 1.609933684474823, "learning_rate": 9.903749377118637e-06, "loss": 0.7517, "step": 1417 }, { "epoch": 0.09, "grad_norm": 1.540375006952657, "learning_rate": 9.903546865369596e-06, "loss": 0.7399, "step": 1418 }, { "epoch": 0.09, "grad_norm": 2.064404505394901, "learning_rate": 9.903344142876414e-06, "loss": 0.828, "step": 1419 }, { "epoch": 0.09, "grad_norm": 1.565240186500351, "learning_rate": 9.903141209647804e-06, "loss": 0.7219, "step": 1420 }, { "epoch": 0.09, "grad_norm": 1.8239909847118552, "learning_rate": 9.902938065692488e-06, "loss": 0.758, "step": 1421 }, { "epoch": 0.09, "grad_norm": 1.6991913198935147, "learning_rate": 9.902734711019196e-06, "loss": 0.7736, "step": 1422 }, { "epoch": 0.09, "grad_norm": 2.0725351894755866, "learning_rate": 9.902531145636668e-06, "loss": 0.7219, "step": 1423 }, { "epoch": 0.09, "grad_norm": 1.9073048053028345, "learning_rate": 9.902327369553655e-06, "loss": 0.7806, "step": 1424 }, { "epoch": 0.09, "grad_norm": 2.093325640555592, "learning_rate": 9.902123382778911e-06, "loss": 0.7998, "step": 1425 }, { "epoch": 0.09, "grad_norm": 1.8781195037481375, "learning_rate": 9.901919185321205e-06, "loss": 0.8785, "step": 1426 }, { "epoch": 0.09, "grad_norm": 1.7200873002296893, "learning_rate": 9.901714777189313e-06, "loss": 0.8066, "step": 1427 }, { "epoch": 0.09, "grad_norm": 2.1267408284108873, "learning_rate": 9.901510158392021e-06, "loss": 0.7862, "step": 1428 }, { "epoch": 0.09, "grad_norm": 1.8571234423333827, "learning_rate": 9.90130532893812e-06, "loss": 0.9565, "step": 1429 }, { "epoch": 0.09, "grad_norm": 1.6760883691209636, "learning_rate": 9.901100288836416e-06, "loss": 0.8079, "step": 1430 }, { "epoch": 0.09, "grad_norm": 1.8161419345166268, "learning_rate": 9.900895038095718e-06, "loss": 0.7698, "step": 1431 }, { "epoch": 0.09, "grad_norm": 1.3831643646306566, "learning_rate": 9.900689576724854e-06, "loss": 0.6984, "step": 1432 }, { "epoch": 0.09, "grad_norm": 1.6889048870572894, "learning_rate": 9.900483904732645e-06, "loss": 0.7207, "step": 1433 }, { "epoch": 0.09, "grad_norm": 1.7128975364074555, "learning_rate": 9.900278022127938e-06, "loss": 0.8061, "step": 1434 }, { "epoch": 0.09, "grad_norm": 1.7426697793998682, "learning_rate": 9.900071928919577e-06, "loss": 0.7869, "step": 1435 }, { "epoch": 0.09, "grad_norm": 1.5413676443300877, "learning_rate": 9.899865625116423e-06, "loss": 0.833, "step": 1436 }, { "epoch": 0.09, "grad_norm": 1.753636165351524, "learning_rate": 9.899659110727339e-06, "loss": 0.8124, "step": 1437 }, { "epoch": 0.09, "grad_norm": 1.619426609330919, "learning_rate": 9.8994523857612e-06, "loss": 0.8555, "step": 1438 }, { "epoch": 0.09, "grad_norm": 1.8773358153660995, "learning_rate": 9.899245450226897e-06, "loss": 0.7234, "step": 1439 }, { "epoch": 0.09, "grad_norm": 1.6142282003854733, "learning_rate": 9.899038304133318e-06, "loss": 0.8238, "step": 1440 }, { "epoch": 0.09, "grad_norm": 1.531801733245992, "learning_rate": 9.898830947489365e-06, "loss": 0.7008, "step": 1441 }, { "epoch": 0.09, "grad_norm": 11.792347531207412, "learning_rate": 9.898623380303953e-06, "loss": 0.7772, "step": 1442 }, { "epoch": 0.09, "grad_norm": 1.3742065260112635, "learning_rate": 9.898415602586e-06, "loss": 0.6471, "step": 1443 }, { "epoch": 0.09, "grad_norm": 1.7356222072235936, "learning_rate": 9.89820761434444e-06, "loss": 0.8458, "step": 1444 }, { "epoch": 0.09, "grad_norm": 2.1978830315414077, "learning_rate": 9.89799941558821e-06, "loss": 0.7856, "step": 1445 }, { "epoch": 0.09, "grad_norm": 1.7405791864110498, "learning_rate": 9.897791006326254e-06, "loss": 0.7324, "step": 1446 }, { "epoch": 0.09, "grad_norm": 1.4158087897862284, "learning_rate": 9.897582386567532e-06, "loss": 0.7745, "step": 1447 }, { "epoch": 0.09, "grad_norm": 1.5124306637408256, "learning_rate": 9.897373556321013e-06, "loss": 0.7415, "step": 1448 }, { "epoch": 0.09, "grad_norm": 1.5793041067441713, "learning_rate": 9.897164515595667e-06, "loss": 0.7612, "step": 1449 }, { "epoch": 0.09, "grad_norm": 1.647423983221878, "learning_rate": 9.896955264400483e-06, "loss": 0.8038, "step": 1450 }, { "epoch": 0.09, "grad_norm": 1.6047458873895375, "learning_rate": 9.89674580274445e-06, "loss": 0.7961, "step": 1451 }, { "epoch": 0.09, "grad_norm": 1.7418738765072677, "learning_rate": 9.896536130636571e-06, "loss": 0.7569, "step": 1452 }, { "epoch": 0.09, "grad_norm": 1.7641866098490027, "learning_rate": 9.896326248085862e-06, "loss": 0.7113, "step": 1453 }, { "epoch": 0.09, "grad_norm": 1.8502283403499429, "learning_rate": 9.896116155101335e-06, "loss": 0.8461, "step": 1454 }, { "epoch": 0.09, "grad_norm": 1.9531680165133725, "learning_rate": 9.895905851692026e-06, "loss": 0.7858, "step": 1455 }, { "epoch": 0.09, "grad_norm": 1.6199174553938938, "learning_rate": 9.89569533786697e-06, "loss": 0.8082, "step": 1456 }, { "epoch": 0.09, "grad_norm": 1.5558875215928973, "learning_rate": 9.895484613635217e-06, "loss": 0.8071, "step": 1457 }, { "epoch": 0.09, "grad_norm": 1.8156056631024533, "learning_rate": 9.895273679005822e-06, "loss": 0.8656, "step": 1458 }, { "epoch": 0.09, "grad_norm": 1.5314211998787375, "learning_rate": 9.89506253398785e-06, "loss": 0.879, "step": 1459 }, { "epoch": 0.09, "grad_norm": 2.001788039554895, "learning_rate": 9.894851178590377e-06, "loss": 0.8061, "step": 1460 }, { "epoch": 0.09, "grad_norm": 1.0945811262045544, "learning_rate": 9.894639612822486e-06, "loss": 0.6452, "step": 1461 }, { "epoch": 0.09, "grad_norm": 1.6916343021074909, "learning_rate": 9.894427836693267e-06, "loss": 0.834, "step": 1462 }, { "epoch": 0.09, "grad_norm": 1.6808559635656706, "learning_rate": 9.894215850211825e-06, "loss": 0.7623, "step": 1463 }, { "epoch": 0.09, "grad_norm": 1.8074113366567173, "learning_rate": 9.894003653387272e-06, "loss": 0.8118, "step": 1464 }, { "epoch": 0.09, "grad_norm": 1.391376047749255, "learning_rate": 9.893791246228726e-06, "loss": 0.6266, "step": 1465 }, { "epoch": 0.09, "grad_norm": 1.5098321200616984, "learning_rate": 9.893578628745312e-06, "loss": 0.7989, "step": 1466 }, { "epoch": 0.09, "grad_norm": 1.7495965534136912, "learning_rate": 9.893365800946176e-06, "loss": 0.8873, "step": 1467 }, { "epoch": 0.09, "grad_norm": 1.6858434465649923, "learning_rate": 9.893152762840457e-06, "loss": 0.7976, "step": 1468 }, { "epoch": 0.09, "grad_norm": 1.5138627300456566, "learning_rate": 9.892939514437314e-06, "loss": 0.7975, "step": 1469 }, { "epoch": 0.09, "grad_norm": 1.4799671541157002, "learning_rate": 9.892726055745914e-06, "loss": 0.7238, "step": 1470 }, { "epoch": 0.09, "grad_norm": 1.874249026724328, "learning_rate": 9.892512386775429e-06, "loss": 0.8059, "step": 1471 }, { "epoch": 0.09, "grad_norm": 1.7559965243787359, "learning_rate": 9.89229850753504e-06, "loss": 0.7607, "step": 1472 }, { "epoch": 0.09, "grad_norm": 1.8554899750940683, "learning_rate": 9.892084418033942e-06, "loss": 0.7333, "step": 1473 }, { "epoch": 0.09, "grad_norm": 1.1324577942859546, "learning_rate": 9.891870118281336e-06, "loss": 0.6369, "step": 1474 }, { "epoch": 0.09, "grad_norm": 1.6457391769289247, "learning_rate": 9.891655608286432e-06, "loss": 0.6484, "step": 1475 }, { "epoch": 0.09, "grad_norm": 1.7063195369610686, "learning_rate": 9.891440888058449e-06, "loss": 0.7743, "step": 1476 }, { "epoch": 0.09, "grad_norm": 2.150935292573339, "learning_rate": 9.891225957606613e-06, "loss": 0.8616, "step": 1477 }, { "epoch": 0.09, "grad_norm": 1.8097387400253737, "learning_rate": 9.891010816940165e-06, "loss": 0.9244, "step": 1478 }, { "epoch": 0.09, "grad_norm": 1.729939321335769, "learning_rate": 9.890795466068351e-06, "loss": 0.7732, "step": 1479 }, { "epoch": 0.09, "grad_norm": 1.071708769979596, "learning_rate": 9.890579905000422e-06, "loss": 0.6231, "step": 1480 }, { "epoch": 0.09, "grad_norm": 1.559468948026497, "learning_rate": 9.890364133745646e-06, "loss": 0.7431, "step": 1481 }, { "epoch": 0.09, "grad_norm": 1.5976391677254405, "learning_rate": 9.890148152313295e-06, "loss": 0.7598, "step": 1482 }, { "epoch": 0.09, "grad_norm": 1.4214759226453293, "learning_rate": 9.889931960712653e-06, "loss": 0.6736, "step": 1483 }, { "epoch": 0.09, "grad_norm": 2.332542933259744, "learning_rate": 9.88971555895301e-06, "loss": 0.7205, "step": 1484 }, { "epoch": 0.1, "grad_norm": 1.489541499828104, "learning_rate": 9.88949894704367e-06, "loss": 0.6896, "step": 1485 }, { "epoch": 0.1, "grad_norm": 1.7767431137338803, "learning_rate": 9.889282124993936e-06, "loss": 0.8232, "step": 1486 }, { "epoch": 0.1, "grad_norm": 1.391185830651177, "learning_rate": 9.889065092813131e-06, "loss": 0.7747, "step": 1487 }, { "epoch": 0.1, "grad_norm": 1.6535781214177077, "learning_rate": 9.888847850510581e-06, "loss": 0.8408, "step": 1488 }, { "epoch": 0.1, "grad_norm": 1.915480348897025, "learning_rate": 9.888630398095623e-06, "loss": 0.7672, "step": 1489 }, { "epoch": 0.1, "grad_norm": 1.6949485066377055, "learning_rate": 9.888412735577604e-06, "loss": 0.7894, "step": 1490 }, { "epoch": 0.1, "grad_norm": 1.736704476323435, "learning_rate": 9.888194862965877e-06, "loss": 0.789, "step": 1491 }, { "epoch": 0.1, "grad_norm": 1.7339157539089247, "learning_rate": 9.887976780269807e-06, "loss": 0.7444, "step": 1492 }, { "epoch": 0.1, "grad_norm": 1.680270546972446, "learning_rate": 9.887758487498765e-06, "loss": 0.8172, "step": 1493 }, { "epoch": 0.1, "grad_norm": 1.6952459889944558, "learning_rate": 9.887539984662135e-06, "loss": 0.7598, "step": 1494 }, { "epoch": 0.1, "grad_norm": 1.8591899738599524, "learning_rate": 9.887321271769305e-06, "loss": 0.7503, "step": 1495 }, { "epoch": 0.1, "grad_norm": 1.959950195853302, "learning_rate": 9.887102348829678e-06, "loss": 0.7936, "step": 1496 }, { "epoch": 0.1, "grad_norm": 1.7999517430361605, "learning_rate": 9.88688321585266e-06, "loss": 0.8164, "step": 1497 }, { "epoch": 0.1, "grad_norm": 2.8053338632417613, "learning_rate": 9.886663872847672e-06, "loss": 0.8489, "step": 1498 }, { "epoch": 0.1, "grad_norm": 1.7103267905339752, "learning_rate": 9.886444319824138e-06, "loss": 0.7149, "step": 1499 }, { "epoch": 0.1, "grad_norm": 1.8015664506757632, "learning_rate": 9.886224556791495e-06, "loss": 0.8344, "step": 1500 }, { "epoch": 0.1, "grad_norm": 1.8703328710973166, "learning_rate": 9.886004583759187e-06, "loss": 0.8316, "step": 1501 }, { "epoch": 0.1, "grad_norm": 1.7416981735056, "learning_rate": 9.885784400736672e-06, "loss": 0.7689, "step": 1502 }, { "epoch": 0.1, "grad_norm": 1.097235845165237, "learning_rate": 9.885564007733406e-06, "loss": 0.641, "step": 1503 }, { "epoch": 0.1, "grad_norm": 1.1703366159725115, "learning_rate": 9.885343404758867e-06, "loss": 0.6653, "step": 1504 }, { "epoch": 0.1, "grad_norm": 1.495885085676948, "learning_rate": 9.885122591822533e-06, "loss": 0.7136, "step": 1505 }, { "epoch": 0.1, "grad_norm": 1.5482649750457103, "learning_rate": 9.884901568933896e-06, "loss": 0.7779, "step": 1506 }, { "epoch": 0.1, "grad_norm": 1.7361396004231884, "learning_rate": 9.884680336102452e-06, "loss": 0.803, "step": 1507 }, { "epoch": 0.1, "grad_norm": 1.798627781228464, "learning_rate": 9.884458893337714e-06, "loss": 0.8077, "step": 1508 }, { "epoch": 0.1, "grad_norm": 1.6663016612420396, "learning_rate": 9.884237240649195e-06, "loss": 0.7544, "step": 1509 }, { "epoch": 0.1, "grad_norm": 1.6707527438008307, "learning_rate": 9.884015378046424e-06, "loss": 0.749, "step": 1510 }, { "epoch": 0.1, "grad_norm": 1.9454644862229202, "learning_rate": 9.883793305538934e-06, "loss": 0.8331, "step": 1511 }, { "epoch": 0.1, "grad_norm": 1.8871195673019843, "learning_rate": 9.88357102313627e-06, "loss": 0.8336, "step": 1512 }, { "epoch": 0.1, "grad_norm": 1.0743676290441466, "learning_rate": 9.883348530847985e-06, "loss": 0.5996, "step": 1513 }, { "epoch": 0.1, "grad_norm": 1.1854914415317104, "learning_rate": 9.88312582868364e-06, "loss": 0.6254, "step": 1514 }, { "epoch": 0.1, "grad_norm": 1.7092493910299968, "learning_rate": 9.88290291665281e-06, "loss": 0.7139, "step": 1515 }, { "epoch": 0.1, "grad_norm": 1.6159694412466112, "learning_rate": 9.882679794765072e-06, "loss": 0.7863, "step": 1516 }, { "epoch": 0.1, "grad_norm": 1.5547906300004923, "learning_rate": 9.882456463030017e-06, "loss": 0.859, "step": 1517 }, { "epoch": 0.1, "grad_norm": 1.5219606141159285, "learning_rate": 9.882232921457242e-06, "loss": 0.7419, "step": 1518 }, { "epoch": 0.1, "grad_norm": 1.562185171401622, "learning_rate": 9.882009170056354e-06, "loss": 0.7419, "step": 1519 }, { "epoch": 0.1, "grad_norm": 1.6591903500305096, "learning_rate": 9.881785208836974e-06, "loss": 0.7113, "step": 1520 }, { "epoch": 0.1, "grad_norm": 1.6298345037865085, "learning_rate": 9.88156103780872e-06, "loss": 0.7797, "step": 1521 }, { "epoch": 0.1, "grad_norm": 2.248549932600784, "learning_rate": 9.881336656981232e-06, "loss": 0.7993, "step": 1522 }, { "epoch": 0.1, "grad_norm": 1.8077553860746163, "learning_rate": 9.881112066364151e-06, "loss": 0.7696, "step": 1523 }, { "epoch": 0.1, "grad_norm": 1.6063110328360517, "learning_rate": 9.88088726596713e-06, "loss": 0.7694, "step": 1524 }, { "epoch": 0.1, "grad_norm": 1.621270671207652, "learning_rate": 9.880662255799831e-06, "loss": 0.7126, "step": 1525 }, { "epoch": 0.1, "grad_norm": 1.5691802145809577, "learning_rate": 9.880437035871924e-06, "loss": 0.7629, "step": 1526 }, { "epoch": 0.1, "grad_norm": 1.2959875904448017, "learning_rate": 9.880211606193089e-06, "loss": 0.6758, "step": 1527 }, { "epoch": 0.1, "grad_norm": 2.8213850835969603, "learning_rate": 9.879985966773012e-06, "loss": 0.7163, "step": 1528 }, { "epoch": 0.1, "grad_norm": 1.7208754465863025, "learning_rate": 9.879760117621393e-06, "loss": 0.7591, "step": 1529 }, { "epoch": 0.1, "grad_norm": 1.986018109691483, "learning_rate": 9.879534058747939e-06, "loss": 0.8896, "step": 1530 }, { "epoch": 0.1, "grad_norm": 1.5430117963696894, "learning_rate": 9.879307790162364e-06, "loss": 0.8084, "step": 1531 }, { "epoch": 0.1, "grad_norm": 1.5409182660541272, "learning_rate": 9.879081311874392e-06, "loss": 0.7444, "step": 1532 }, { "epoch": 0.1, "grad_norm": 1.7136270973239567, "learning_rate": 9.87885462389376e-06, "loss": 0.8492, "step": 1533 }, { "epoch": 0.1, "grad_norm": 1.6845003826036034, "learning_rate": 9.878627726230206e-06, "loss": 0.7587, "step": 1534 }, { "epoch": 0.1, "grad_norm": 1.8588677171818204, "learning_rate": 9.878400618893483e-06, "loss": 0.7296, "step": 1535 }, { "epoch": 0.1, "grad_norm": 1.62678944180228, "learning_rate": 9.878173301893355e-06, "loss": 0.7129, "step": 1536 }, { "epoch": 0.1, "grad_norm": 1.7542267793117643, "learning_rate": 9.877945775239585e-06, "loss": 0.7423, "step": 1537 }, { "epoch": 0.1, "grad_norm": 1.3214462714748687, "learning_rate": 9.87771803894196e-06, "loss": 0.6724, "step": 1538 }, { "epoch": 0.1, "grad_norm": 1.8640047654909846, "learning_rate": 9.87749009301026e-06, "loss": 0.7746, "step": 1539 }, { "epoch": 0.1, "grad_norm": 1.711997478915398, "learning_rate": 9.877261937454286e-06, "loss": 0.8077, "step": 1540 }, { "epoch": 0.1, "grad_norm": 2.4675907533303305, "learning_rate": 9.877033572283842e-06, "loss": 0.6858, "step": 1541 }, { "epoch": 0.1, "grad_norm": 1.758150421927609, "learning_rate": 9.876804997508744e-06, "loss": 0.9089, "step": 1542 }, { "epoch": 0.1, "grad_norm": 1.4970815924124992, "learning_rate": 9.876576213138815e-06, "loss": 0.7432, "step": 1543 }, { "epoch": 0.1, "grad_norm": 3.3550931642335975, "learning_rate": 9.876347219183888e-06, "loss": 0.8207, "step": 1544 }, { "epoch": 0.1, "grad_norm": 2.064557709422997, "learning_rate": 9.876118015653804e-06, "loss": 0.7593, "step": 1545 }, { "epoch": 0.1, "grad_norm": 1.8901971694206634, "learning_rate": 9.87588860255841e-06, "loss": 0.8154, "step": 1546 }, { "epoch": 0.1, "grad_norm": 1.916849257658654, "learning_rate": 9.875658979907574e-06, "loss": 0.7752, "step": 1547 }, { "epoch": 0.1, "grad_norm": 1.5002940596664343, "learning_rate": 9.875429147711158e-06, "loss": 0.7701, "step": 1548 }, { "epoch": 0.1, "grad_norm": 1.710311936013766, "learning_rate": 9.875199105979043e-06, "loss": 0.8933, "step": 1549 }, { "epoch": 0.1, "grad_norm": 1.7349922498353438, "learning_rate": 9.874968854721115e-06, "loss": 0.7868, "step": 1550 }, { "epoch": 0.1, "grad_norm": 1.6391352904000596, "learning_rate": 9.874738393947268e-06, "loss": 0.835, "step": 1551 }, { "epoch": 0.1, "grad_norm": 1.3441859764446644, "learning_rate": 9.874507723667409e-06, "loss": 0.7988, "step": 1552 }, { "epoch": 0.1, "grad_norm": 1.979201357213965, "learning_rate": 9.87427684389145e-06, "loss": 0.72, "step": 1553 }, { "epoch": 0.1, "grad_norm": 2.0040240555586717, "learning_rate": 9.874045754629314e-06, "loss": 0.8537, "step": 1554 }, { "epoch": 0.1, "grad_norm": 1.967938542268629, "learning_rate": 9.873814455890934e-06, "loss": 0.7112, "step": 1555 }, { "epoch": 0.1, "grad_norm": 1.0917612102271113, "learning_rate": 9.87358294768625e-06, "loss": 0.5674, "step": 1556 }, { "epoch": 0.1, "grad_norm": 1.4013625958716303, "learning_rate": 9.873351230025212e-06, "loss": 0.6345, "step": 1557 }, { "epoch": 0.1, "grad_norm": 1.1175642725210506, "learning_rate": 9.873119302917778e-06, "loss": 0.6883, "step": 1558 }, { "epoch": 0.1, "grad_norm": 1.615477604419287, "learning_rate": 9.872887166373916e-06, "loss": 0.7833, "step": 1559 }, { "epoch": 0.1, "grad_norm": 1.5318656227418666, "learning_rate": 9.872654820403604e-06, "loss": 0.7003, "step": 1560 }, { "epoch": 0.1, "grad_norm": 1.2246709584346156, "learning_rate": 9.872422265016827e-06, "loss": 0.7181, "step": 1561 }, { "epoch": 0.1, "grad_norm": 1.8259889861986818, "learning_rate": 9.872189500223577e-06, "loss": 0.7826, "step": 1562 }, { "epoch": 0.1, "grad_norm": 1.6185799517091026, "learning_rate": 9.871956526033863e-06, "loss": 0.7682, "step": 1563 }, { "epoch": 0.1, "grad_norm": 1.6708706775861175, "learning_rate": 9.871723342457693e-06, "loss": 0.8231, "step": 1564 }, { "epoch": 0.1, "grad_norm": 1.6269107697615546, "learning_rate": 9.871489949505091e-06, "loss": 0.8208, "step": 1565 }, { "epoch": 0.1, "grad_norm": 1.8280244173664344, "learning_rate": 9.871256347186087e-06, "loss": 0.7886, "step": 1566 }, { "epoch": 0.1, "grad_norm": 1.1964196128482611, "learning_rate": 9.871022535510722e-06, "loss": 0.7226, "step": 1567 }, { "epoch": 0.1, "grad_norm": 1.5430872835297225, "learning_rate": 9.870788514489045e-06, "loss": 0.7021, "step": 1568 }, { "epoch": 0.1, "grad_norm": 2.098874590207589, "learning_rate": 9.870554284131111e-06, "loss": 0.7965, "step": 1569 }, { "epoch": 0.1, "grad_norm": 1.6549060812982925, "learning_rate": 9.870319844446987e-06, "loss": 0.7454, "step": 1570 }, { "epoch": 0.1, "grad_norm": 1.7671246678347914, "learning_rate": 9.870085195446752e-06, "loss": 0.7965, "step": 1571 }, { "epoch": 0.1, "grad_norm": 1.365220577247846, "learning_rate": 9.869850337140489e-06, "loss": 0.6895, "step": 1572 }, { "epoch": 0.1, "grad_norm": 1.7032001060222624, "learning_rate": 9.86961526953829e-06, "loss": 0.763, "step": 1573 }, { "epoch": 0.1, "grad_norm": 1.8048656936090846, "learning_rate": 9.86937999265026e-06, "loss": 0.8266, "step": 1574 }, { "epoch": 0.1, "grad_norm": 1.7343023891340075, "learning_rate": 9.86914450648651e-06, "loss": 0.7696, "step": 1575 }, { "epoch": 0.1, "grad_norm": 2.862371175219624, "learning_rate": 9.86890881105716e-06, "loss": 0.6964, "step": 1576 }, { "epoch": 0.1, "grad_norm": 1.6675153747145874, "learning_rate": 9.868672906372341e-06, "loss": 0.6863, "step": 1577 }, { "epoch": 0.1, "grad_norm": 1.4084484280380045, "learning_rate": 9.868436792442191e-06, "loss": 0.7476, "step": 1578 }, { "epoch": 0.1, "grad_norm": 1.4923726450513668, "learning_rate": 9.868200469276858e-06, "loss": 0.7622, "step": 1579 }, { "epoch": 0.1, "grad_norm": 1.565897660702118, "learning_rate": 9.867963936886497e-06, "loss": 0.7887, "step": 1580 }, { "epoch": 0.1, "grad_norm": 1.6464259836274526, "learning_rate": 9.867727195281275e-06, "loss": 0.8307, "step": 1581 }, { "epoch": 0.1, "grad_norm": 1.5655675146175585, "learning_rate": 9.867490244471367e-06, "loss": 0.8764, "step": 1582 }, { "epoch": 0.1, "grad_norm": 1.572506431452719, "learning_rate": 9.867253084466957e-06, "loss": 0.7325, "step": 1583 }, { "epoch": 0.1, "grad_norm": 1.6340708341566534, "learning_rate": 9.867015715278236e-06, "loss": 0.7607, "step": 1584 }, { "epoch": 0.1, "grad_norm": 1.5941201061905297, "learning_rate": 9.866778136915408e-06, "loss": 0.6582, "step": 1585 }, { "epoch": 0.1, "grad_norm": 2.0052306438977934, "learning_rate": 9.86654034938868e-06, "loss": 0.6726, "step": 1586 }, { "epoch": 0.1, "grad_norm": 2.010144435802643, "learning_rate": 9.866302352708276e-06, "loss": 0.745, "step": 1587 }, { "epoch": 0.1, "grad_norm": 1.6097975136376215, "learning_rate": 9.866064146884422e-06, "loss": 0.7736, "step": 1588 }, { "epoch": 0.1, "grad_norm": 3.006243291704856, "learning_rate": 9.865825731927356e-06, "loss": 0.735, "step": 1589 }, { "epoch": 0.1, "grad_norm": 1.4116744939103925, "learning_rate": 9.865587107847323e-06, "loss": 0.7114, "step": 1590 }, { "epoch": 0.1, "grad_norm": 1.2037575043073243, "learning_rate": 9.865348274654582e-06, "loss": 0.6516, "step": 1591 }, { "epoch": 0.1, "grad_norm": 13.310449320080052, "learning_rate": 9.865109232359394e-06, "loss": 0.7257, "step": 1592 }, { "epoch": 0.1, "grad_norm": 1.627351824408714, "learning_rate": 9.864869980972036e-06, "loss": 0.7685, "step": 1593 }, { "epoch": 0.1, "grad_norm": 2.011581453222143, "learning_rate": 9.864630520502787e-06, "loss": 0.8143, "step": 1594 }, { "epoch": 0.1, "grad_norm": 1.6070113526939545, "learning_rate": 9.86439085096194e-06, "loss": 0.8423, "step": 1595 }, { "epoch": 0.1, "grad_norm": 1.796801032343311, "learning_rate": 9.864150972359798e-06, "loss": 0.7605, "step": 1596 }, { "epoch": 0.1, "grad_norm": 2.051274394909971, "learning_rate": 9.863910884706667e-06, "loss": 0.7122, "step": 1597 }, { "epoch": 0.1, "grad_norm": 1.7163339530587947, "learning_rate": 9.863670588012865e-06, "loss": 0.7893, "step": 1598 }, { "epoch": 0.1, "grad_norm": 1.7813726816427264, "learning_rate": 9.863430082288724e-06, "loss": 0.7679, "step": 1599 }, { "epoch": 0.1, "grad_norm": 1.1358489370266969, "learning_rate": 9.863189367544576e-06, "loss": 0.7401, "step": 1600 }, { "epoch": 0.1, "grad_norm": 1.6923065150962497, "learning_rate": 9.862948443790768e-06, "loss": 0.7399, "step": 1601 }, { "epoch": 0.1, "grad_norm": 1.7917118341471634, "learning_rate": 9.862707311037654e-06, "loss": 0.721, "step": 1602 }, { "epoch": 0.1, "grad_norm": 1.592017896363788, "learning_rate": 9.862465969295599e-06, "loss": 0.8347, "step": 1603 }, { "epoch": 0.1, "grad_norm": 1.720606889108258, "learning_rate": 9.862224418574972e-06, "loss": 0.797, "step": 1604 }, { "epoch": 0.1, "grad_norm": 2.306117826680241, "learning_rate": 9.861982658886158e-06, "loss": 0.7904, "step": 1605 }, { "epoch": 0.1, "grad_norm": 1.549292212077288, "learning_rate": 9.861740690239546e-06, "loss": 0.735, "step": 1606 }, { "epoch": 0.1, "grad_norm": 1.7351473370999078, "learning_rate": 9.861498512645532e-06, "loss": 0.8926, "step": 1607 }, { "epoch": 0.1, "grad_norm": 1.0963941419259087, "learning_rate": 9.861256126114532e-06, "loss": 0.6735, "step": 1608 }, { "epoch": 0.1, "grad_norm": 1.4408499708187965, "learning_rate": 9.861013530656956e-06, "loss": 0.7297, "step": 1609 }, { "epoch": 0.1, "grad_norm": 1.5232192445452104, "learning_rate": 9.860770726283234e-06, "loss": 0.7924, "step": 1610 }, { "epoch": 0.1, "grad_norm": 1.2734062941203683, "learning_rate": 9.860527713003797e-06, "loss": 0.7235, "step": 1611 }, { "epoch": 0.1, "grad_norm": 1.732490498221523, "learning_rate": 9.860284490829097e-06, "loss": 0.8298, "step": 1612 }, { "epoch": 0.1, "grad_norm": 1.3546096428737875, "learning_rate": 9.86004105976958e-06, "loss": 0.6784, "step": 1613 }, { "epoch": 0.1, "grad_norm": 1.660871609415446, "learning_rate": 9.85979741983571e-06, "loss": 0.8019, "step": 1614 }, { "epoch": 0.1, "grad_norm": 1.7109473097724799, "learning_rate": 9.85955357103796e-06, "loss": 0.8436, "step": 1615 }, { "epoch": 0.1, "grad_norm": 1.5279478898236976, "learning_rate": 9.859309513386808e-06, "loss": 0.7251, "step": 1616 }, { "epoch": 0.1, "grad_norm": 1.5335198171391058, "learning_rate": 9.859065246892744e-06, "loss": 0.7093, "step": 1617 }, { "epoch": 0.1, "grad_norm": 1.7032055195996263, "learning_rate": 9.858820771566267e-06, "loss": 0.7094, "step": 1618 }, { "epoch": 0.1, "grad_norm": 1.586655924627059, "learning_rate": 9.858576087417881e-06, "loss": 0.8408, "step": 1619 }, { "epoch": 0.1, "grad_norm": 1.8710216970089615, "learning_rate": 9.858331194458105e-06, "loss": 0.7258, "step": 1620 }, { "epoch": 0.1, "grad_norm": 1.4946824853287048, "learning_rate": 9.858086092697464e-06, "loss": 0.7486, "step": 1621 }, { "epoch": 0.1, "grad_norm": 1.5309461848891603, "learning_rate": 9.85784078214649e-06, "loss": 0.778, "step": 1622 }, { "epoch": 0.1, "grad_norm": 1.2120153624096741, "learning_rate": 9.857595262815726e-06, "loss": 0.7423, "step": 1623 }, { "epoch": 0.1, "grad_norm": 1.603797910821844, "learning_rate": 9.857349534715728e-06, "loss": 0.7016, "step": 1624 }, { "epoch": 0.1, "grad_norm": 1.5011190452913514, "learning_rate": 9.857103597857051e-06, "loss": 0.7602, "step": 1625 }, { "epoch": 0.1, "grad_norm": 1.8754309965198876, "learning_rate": 9.856857452250268e-06, "loss": 0.8069, "step": 1626 }, { "epoch": 0.1, "grad_norm": 1.5474373794151346, "learning_rate": 9.856611097905956e-06, "loss": 0.7051, "step": 1627 }, { "epoch": 0.1, "grad_norm": 1.801120688056156, "learning_rate": 9.856364534834708e-06, "loss": 0.7239, "step": 1628 }, { "epoch": 0.1, "grad_norm": 1.6217803786011353, "learning_rate": 9.856117763047115e-06, "loss": 0.7913, "step": 1629 }, { "epoch": 0.1, "grad_norm": 1.6797674002289795, "learning_rate": 9.855870782553783e-06, "loss": 0.7665, "step": 1630 }, { "epoch": 0.1, "grad_norm": 1.5618502028659706, "learning_rate": 9.855623593365332e-06, "loss": 0.8247, "step": 1631 }, { "epoch": 0.1, "grad_norm": 2.2276079976337195, "learning_rate": 9.855376195492378e-06, "loss": 0.7738, "step": 1632 }, { "epoch": 0.1, "grad_norm": 1.4681121957816115, "learning_rate": 9.855128588945559e-06, "loss": 0.6489, "step": 1633 }, { "epoch": 0.1, "grad_norm": 1.6077430216723312, "learning_rate": 9.854880773735515e-06, "loss": 0.7899, "step": 1634 }, { "epoch": 0.1, "grad_norm": 1.5920219066287458, "learning_rate": 9.8546327498729e-06, "loss": 0.8032, "step": 1635 }, { "epoch": 0.1, "grad_norm": 1.6183710295995037, "learning_rate": 9.854384517368368e-06, "loss": 0.8087, "step": 1636 }, { "epoch": 0.1, "grad_norm": 1.5725065202361792, "learning_rate": 9.854136076232587e-06, "loss": 0.8039, "step": 1637 }, { "epoch": 0.1, "grad_norm": 1.607885190810362, "learning_rate": 9.85388742647624e-06, "loss": 0.8645, "step": 1638 }, { "epoch": 0.1, "grad_norm": 1.585155755025544, "learning_rate": 9.853638568110013e-06, "loss": 0.8219, "step": 1639 }, { "epoch": 0.1, "grad_norm": 1.6001274584391068, "learning_rate": 9.853389501144596e-06, "loss": 0.8167, "step": 1640 }, { "epoch": 0.11, "grad_norm": 1.6778799274899099, "learning_rate": 9.853140225590698e-06, "loss": 0.7921, "step": 1641 }, { "epoch": 0.11, "grad_norm": 1.6177162834403636, "learning_rate": 9.852890741459031e-06, "loss": 0.7409, "step": 1642 }, { "epoch": 0.11, "grad_norm": 1.510201583207035, "learning_rate": 9.852641048760319e-06, "loss": 0.6831, "step": 1643 }, { "epoch": 0.11, "grad_norm": 2.154104330656575, "learning_rate": 9.85239114750529e-06, "loss": 0.7933, "step": 1644 }, { "epoch": 0.11, "grad_norm": 1.5341266403926754, "learning_rate": 9.852141037704683e-06, "loss": 0.7187, "step": 1645 }, { "epoch": 0.11, "grad_norm": 1.7557739831335735, "learning_rate": 9.851890719369253e-06, "loss": 0.758, "step": 1646 }, { "epoch": 0.11, "grad_norm": 1.6172457754217804, "learning_rate": 9.851640192509755e-06, "loss": 0.717, "step": 1647 }, { "epoch": 0.11, "grad_norm": 1.5699074757566802, "learning_rate": 9.851389457136955e-06, "loss": 0.6957, "step": 1648 }, { "epoch": 0.11, "grad_norm": 1.5879878893502493, "learning_rate": 9.85113851326163e-06, "loss": 0.749, "step": 1649 }, { "epoch": 0.11, "grad_norm": 1.2456801495409076, "learning_rate": 9.850887360894567e-06, "loss": 0.7745, "step": 1650 }, { "epoch": 0.11, "grad_norm": 1.8442956300177777, "learning_rate": 9.850636000046558e-06, "loss": 0.8974, "step": 1651 }, { "epoch": 0.11, "grad_norm": 1.78380298212958, "learning_rate": 9.850384430728406e-06, "loss": 0.6942, "step": 1652 }, { "epoch": 0.11, "grad_norm": 1.8850804439195172, "learning_rate": 9.850132652950923e-06, "loss": 0.7982, "step": 1653 }, { "epoch": 0.11, "grad_norm": 1.7131962657412547, "learning_rate": 9.849880666724931e-06, "loss": 0.7497, "step": 1654 }, { "epoch": 0.11, "grad_norm": 1.5993506335220022, "learning_rate": 9.84962847206126e-06, "loss": 0.6879, "step": 1655 }, { "epoch": 0.11, "grad_norm": 1.5274657877645899, "learning_rate": 9.849376068970744e-06, "loss": 0.7496, "step": 1656 }, { "epoch": 0.11, "grad_norm": 1.4622145843088514, "learning_rate": 9.849123457464238e-06, "loss": 0.6785, "step": 1657 }, { "epoch": 0.11, "grad_norm": 1.1837015401614532, "learning_rate": 9.848870637552592e-06, "loss": 0.6587, "step": 1658 }, { "epoch": 0.11, "grad_norm": 1.659833800446065, "learning_rate": 9.848617609246678e-06, "loss": 0.852, "step": 1659 }, { "epoch": 0.11, "grad_norm": 1.6296915898285655, "learning_rate": 9.848364372557366e-06, "loss": 0.7099, "step": 1660 }, { "epoch": 0.11, "grad_norm": 1.741717718137808, "learning_rate": 9.84811092749554e-06, "loss": 0.8416, "step": 1661 }, { "epoch": 0.11, "grad_norm": 1.6925958744250842, "learning_rate": 9.847857274072096e-06, "loss": 0.7975, "step": 1662 }, { "epoch": 0.11, "grad_norm": 1.1269913083872711, "learning_rate": 9.84760341229793e-06, "loss": 0.7203, "step": 1663 }, { "epoch": 0.11, "grad_norm": 1.6364416850548749, "learning_rate": 9.847349342183959e-06, "loss": 0.7826, "step": 1664 }, { "epoch": 0.11, "grad_norm": 1.4720233071764202, "learning_rate": 9.847095063741097e-06, "loss": 0.7556, "step": 1665 }, { "epoch": 0.11, "grad_norm": 1.6530720314649328, "learning_rate": 9.846840576980275e-06, "loss": 0.7834, "step": 1666 }, { "epoch": 0.11, "grad_norm": 1.9514580563135624, "learning_rate": 9.84658588191243e-06, "loss": 0.7115, "step": 1667 }, { "epoch": 0.11, "grad_norm": 1.7519892977143776, "learning_rate": 9.846330978548507e-06, "loss": 0.7506, "step": 1668 }, { "epoch": 0.11, "grad_norm": 2.0000585341407024, "learning_rate": 9.846075866899463e-06, "loss": 0.8213, "step": 1669 }, { "epoch": 0.11, "grad_norm": 1.6290262391677972, "learning_rate": 9.845820546976258e-06, "loss": 0.7108, "step": 1670 }, { "epoch": 0.11, "grad_norm": 3.250244532561895, "learning_rate": 9.845565018789873e-06, "loss": 0.8444, "step": 1671 }, { "epoch": 0.11, "grad_norm": 1.6408553764723313, "learning_rate": 9.845309282351282e-06, "loss": 0.7353, "step": 1672 }, { "epoch": 0.11, "grad_norm": 1.626114045095167, "learning_rate": 9.845053337671482e-06, "loss": 0.7059, "step": 1673 }, { "epoch": 0.11, "grad_norm": 1.4070710764341519, "learning_rate": 9.84479718476147e-06, "loss": 0.7933, "step": 1674 }, { "epoch": 0.11, "grad_norm": 1.5908478486115358, "learning_rate": 9.844540823632254e-06, "loss": 0.7848, "step": 1675 }, { "epoch": 0.11, "grad_norm": 1.6859856447868682, "learning_rate": 9.844284254294854e-06, "loss": 0.8366, "step": 1676 }, { "epoch": 0.11, "grad_norm": 1.8926001619952113, "learning_rate": 9.844027476760295e-06, "loss": 0.805, "step": 1677 }, { "epoch": 0.11, "grad_norm": 1.9328136806404623, "learning_rate": 9.843770491039614e-06, "loss": 0.7416, "step": 1678 }, { "epoch": 0.11, "grad_norm": 1.7304407104563966, "learning_rate": 9.843513297143856e-06, "loss": 0.8179, "step": 1679 }, { "epoch": 0.11, "grad_norm": 1.6544078999280845, "learning_rate": 9.843255895084076e-06, "loss": 0.791, "step": 1680 }, { "epoch": 0.11, "grad_norm": 1.5087904395484522, "learning_rate": 9.842998284871332e-06, "loss": 0.7164, "step": 1681 }, { "epoch": 0.11, "grad_norm": 1.5690600555484102, "learning_rate": 9.842740466516698e-06, "loss": 0.7823, "step": 1682 }, { "epoch": 0.11, "grad_norm": 1.6877231009656433, "learning_rate": 9.842482440031256e-06, "loss": 0.6885, "step": 1683 }, { "epoch": 0.11, "grad_norm": 1.6403713459319915, "learning_rate": 9.842224205426094e-06, "loss": 0.8452, "step": 1684 }, { "epoch": 0.11, "grad_norm": 1.762924693395774, "learning_rate": 9.84196576271231e-06, "loss": 0.7181, "step": 1685 }, { "epoch": 0.11, "grad_norm": 1.6563842804769173, "learning_rate": 9.841707111901013e-06, "loss": 0.7342, "step": 1686 }, { "epoch": 0.11, "grad_norm": 1.2575974448779983, "learning_rate": 9.841448253003319e-06, "loss": 0.7643, "step": 1687 }, { "epoch": 0.11, "grad_norm": 1.7677564517542617, "learning_rate": 9.84118918603035e-06, "loss": 0.6714, "step": 1688 }, { "epoch": 0.11, "grad_norm": 1.684569438234531, "learning_rate": 9.840929910993244e-06, "loss": 0.9442, "step": 1689 }, { "epoch": 0.11, "grad_norm": 1.5214816789652723, "learning_rate": 9.840670427903142e-06, "loss": 0.7848, "step": 1690 }, { "epoch": 0.11, "grad_norm": 1.4863487470828645, "learning_rate": 9.840410736771196e-06, "loss": 0.6927, "step": 1691 }, { "epoch": 0.11, "grad_norm": 1.8547840368330744, "learning_rate": 9.840150837608568e-06, "loss": 0.7885, "step": 1692 }, { "epoch": 0.11, "grad_norm": 1.5156172472714058, "learning_rate": 9.839890730426429e-06, "loss": 0.7111, "step": 1693 }, { "epoch": 0.11, "grad_norm": 2.3338784676087596, "learning_rate": 9.839630415235954e-06, "loss": 0.7491, "step": 1694 }, { "epoch": 0.11, "grad_norm": 1.5168466356807258, "learning_rate": 9.839369892048337e-06, "loss": 0.7786, "step": 1695 }, { "epoch": 0.11, "grad_norm": 1.7251681163835793, "learning_rate": 9.839109160874767e-06, "loss": 0.9036, "step": 1696 }, { "epoch": 0.11, "grad_norm": 1.6245293339742353, "learning_rate": 9.838848221726455e-06, "loss": 0.7746, "step": 1697 }, { "epoch": 0.11, "grad_norm": 1.037589368988872, "learning_rate": 9.838587074614614e-06, "loss": 0.6646, "step": 1698 }, { "epoch": 0.11, "grad_norm": 1.5343304113591423, "learning_rate": 9.838325719550469e-06, "loss": 0.7819, "step": 1699 }, { "epoch": 0.11, "grad_norm": 1.5902994163871036, "learning_rate": 9.838064156545251e-06, "loss": 0.7102, "step": 1700 }, { "epoch": 0.11, "grad_norm": 1.6694554054286268, "learning_rate": 9.8378023856102e-06, "loss": 0.6046, "step": 1701 }, { "epoch": 0.11, "grad_norm": 3.5508061567618783, "learning_rate": 9.83754040675657e-06, "loss": 0.7022, "step": 1702 }, { "epoch": 0.11, "grad_norm": 1.562367277810157, "learning_rate": 9.837278219995618e-06, "loss": 0.7386, "step": 1703 }, { "epoch": 0.11, "grad_norm": 1.4999122421612299, "learning_rate": 9.837015825338611e-06, "loss": 0.6689, "step": 1704 }, { "epoch": 0.11, "grad_norm": 1.7522650431484927, "learning_rate": 9.836753222796831e-06, "loss": 0.7917, "step": 1705 }, { "epoch": 0.11, "grad_norm": 1.1243319111592767, "learning_rate": 9.836490412381559e-06, "loss": 0.6738, "step": 1706 }, { "epoch": 0.11, "grad_norm": 1.6477131488863115, "learning_rate": 9.836227394104093e-06, "loss": 0.9196, "step": 1707 }, { "epoch": 0.11, "grad_norm": 1.4883059758808368, "learning_rate": 9.835964167975734e-06, "loss": 0.8053, "step": 1708 }, { "epoch": 0.11, "grad_norm": 1.78686568243373, "learning_rate": 9.835700734007798e-06, "loss": 0.7525, "step": 1709 }, { "epoch": 0.11, "grad_norm": 1.7547311210656305, "learning_rate": 9.835437092211605e-06, "loss": 0.7129, "step": 1710 }, { "epoch": 0.11, "grad_norm": 1.7275295009192229, "learning_rate": 9.835173242598486e-06, "loss": 0.7513, "step": 1711 }, { "epoch": 0.11, "grad_norm": 1.472421083663438, "learning_rate": 9.834909185179782e-06, "loss": 0.8023, "step": 1712 }, { "epoch": 0.11, "grad_norm": 1.5730173000861327, "learning_rate": 9.834644919966842e-06, "loss": 0.7229, "step": 1713 }, { "epoch": 0.11, "grad_norm": 1.8296699129905514, "learning_rate": 9.83438044697102e-06, "loss": 0.7762, "step": 1714 }, { "epoch": 0.11, "grad_norm": 1.5123678595152346, "learning_rate": 9.834115766203687e-06, "loss": 0.7338, "step": 1715 }, { "epoch": 0.11, "grad_norm": 1.6897765765387018, "learning_rate": 9.833850877676215e-06, "loss": 0.8099, "step": 1716 }, { "epoch": 0.11, "grad_norm": 1.6484755921494088, "learning_rate": 9.833585781399989e-06, "loss": 0.8409, "step": 1717 }, { "epoch": 0.11, "grad_norm": 1.5857344926841186, "learning_rate": 9.833320477386403e-06, "loss": 0.7565, "step": 1718 }, { "epoch": 0.11, "grad_norm": 1.6054406389729252, "learning_rate": 9.83305496564686e-06, "loss": 0.7332, "step": 1719 }, { "epoch": 0.11, "grad_norm": 1.7754773647494853, "learning_rate": 9.83278924619277e-06, "loss": 0.7691, "step": 1720 }, { "epoch": 0.11, "grad_norm": 1.6050026894787355, "learning_rate": 9.832523319035553e-06, "loss": 0.6889, "step": 1721 }, { "epoch": 0.11, "grad_norm": 2.0750923245507593, "learning_rate": 9.832257184186638e-06, "loss": 0.9225, "step": 1722 }, { "epoch": 0.11, "grad_norm": 1.5179354090814645, "learning_rate": 9.831990841657465e-06, "loss": 0.8069, "step": 1723 }, { "epoch": 0.11, "grad_norm": 1.6771598368635638, "learning_rate": 9.831724291459477e-06, "loss": 0.7622, "step": 1724 }, { "epoch": 0.11, "grad_norm": 1.6736687137084856, "learning_rate": 9.831457533604133e-06, "loss": 0.6849, "step": 1725 }, { "epoch": 0.11, "grad_norm": 1.615946706754924, "learning_rate": 9.831190568102895e-06, "loss": 0.722, "step": 1726 }, { "epoch": 0.11, "grad_norm": 1.188716285432324, "learning_rate": 9.83092339496724e-06, "loss": 0.6862, "step": 1727 }, { "epoch": 0.11, "grad_norm": 1.5099280694068704, "learning_rate": 9.830656014208648e-06, "loss": 0.8249, "step": 1728 }, { "epoch": 0.11, "grad_norm": 1.6886429544197339, "learning_rate": 9.83038842583861e-06, "loss": 0.7444, "step": 1729 }, { "epoch": 0.11, "grad_norm": 1.3226205463871663, "learning_rate": 9.830120629868628e-06, "loss": 0.7091, "step": 1730 }, { "epoch": 0.11, "grad_norm": 1.810355828193406, "learning_rate": 9.829852626310213e-06, "loss": 0.7027, "step": 1731 }, { "epoch": 0.11, "grad_norm": 1.4826350705921438, "learning_rate": 9.829584415174879e-06, "loss": 0.7615, "step": 1732 }, { "epoch": 0.11, "grad_norm": 1.4791616633816709, "learning_rate": 9.829315996474156e-06, "loss": 0.7444, "step": 1733 }, { "epoch": 0.11, "grad_norm": 1.1516732809158794, "learning_rate": 9.829047370219577e-06, "loss": 0.624, "step": 1734 }, { "epoch": 0.11, "grad_norm": 1.2818671546335272, "learning_rate": 9.828778536422692e-06, "loss": 0.6432, "step": 1735 }, { "epoch": 0.11, "grad_norm": 1.8359191932476573, "learning_rate": 9.828509495095051e-06, "loss": 0.784, "step": 1736 }, { "epoch": 0.11, "grad_norm": 1.605820514939674, "learning_rate": 9.828240246248217e-06, "loss": 0.6566, "step": 1737 }, { "epoch": 0.11, "grad_norm": 1.6839848744446362, "learning_rate": 9.827970789893764e-06, "loss": 0.7384, "step": 1738 }, { "epoch": 0.11, "grad_norm": 1.8287147972838154, "learning_rate": 9.827701126043272e-06, "loss": 0.7191, "step": 1739 }, { "epoch": 0.11, "grad_norm": 1.7312552745159282, "learning_rate": 9.82743125470833e-06, "loss": 0.7869, "step": 1740 }, { "epoch": 0.11, "grad_norm": 1.6793599219092572, "learning_rate": 9.827161175900535e-06, "loss": 0.8401, "step": 1741 }, { "epoch": 0.11, "grad_norm": 1.495919080166114, "learning_rate": 9.826890889631498e-06, "loss": 0.703, "step": 1742 }, { "epoch": 0.11, "grad_norm": 2.0711401815489223, "learning_rate": 9.826620395912832e-06, "loss": 0.6632, "step": 1743 }, { "epoch": 0.11, "grad_norm": 1.5783455535856437, "learning_rate": 9.826349694756163e-06, "loss": 0.6986, "step": 1744 }, { "epoch": 0.11, "grad_norm": 1.534773000515163, "learning_rate": 9.826078786173126e-06, "loss": 0.7146, "step": 1745 }, { "epoch": 0.11, "grad_norm": 1.5328310888958125, "learning_rate": 9.825807670175364e-06, "loss": 0.6982, "step": 1746 }, { "epoch": 0.11, "grad_norm": 1.824263218288273, "learning_rate": 9.82553634677453e-06, "loss": 0.8235, "step": 1747 }, { "epoch": 0.11, "grad_norm": 1.7116621430194352, "learning_rate": 9.82526481598228e-06, "loss": 0.7898, "step": 1748 }, { "epoch": 0.11, "grad_norm": 1.7780764206982795, "learning_rate": 9.82499307781029e-06, "loss": 0.7683, "step": 1749 }, { "epoch": 0.11, "grad_norm": 1.5766805284103003, "learning_rate": 9.824721132270236e-06, "loss": 0.7248, "step": 1750 }, { "epoch": 0.11, "grad_norm": 1.622923432656305, "learning_rate": 9.824448979373807e-06, "loss": 0.8058, "step": 1751 }, { "epoch": 0.11, "grad_norm": 1.7240513636454298, "learning_rate": 9.824176619132698e-06, "loss": 0.7646, "step": 1752 }, { "epoch": 0.11, "grad_norm": 1.8134264984251542, "learning_rate": 9.823904051558613e-06, "loss": 0.7734, "step": 1753 }, { "epoch": 0.11, "grad_norm": 1.092167588329933, "learning_rate": 9.82363127666327e-06, "loss": 0.6668, "step": 1754 }, { "epoch": 0.11, "grad_norm": 1.8729877232942445, "learning_rate": 9.82335829445839e-06, "loss": 0.8707, "step": 1755 }, { "epoch": 0.11, "grad_norm": 1.8088395534608896, "learning_rate": 9.823085104955705e-06, "loss": 0.7967, "step": 1756 }, { "epoch": 0.11, "grad_norm": 1.5869735757346852, "learning_rate": 9.822811708166958e-06, "loss": 0.7915, "step": 1757 }, { "epoch": 0.11, "grad_norm": 1.5964255122633306, "learning_rate": 9.822538104103898e-06, "loss": 0.7505, "step": 1758 }, { "epoch": 0.11, "grad_norm": 1.8656555895060007, "learning_rate": 9.822264292778282e-06, "loss": 0.799, "step": 1759 }, { "epoch": 0.11, "grad_norm": 1.5929989098665693, "learning_rate": 9.821990274201883e-06, "loss": 0.7974, "step": 1760 }, { "epoch": 0.11, "grad_norm": 1.7103181660016327, "learning_rate": 9.821716048386472e-06, "loss": 0.7338, "step": 1761 }, { "epoch": 0.11, "grad_norm": 2.0290137965112915, "learning_rate": 9.821441615343838e-06, "loss": 0.6902, "step": 1762 }, { "epoch": 0.11, "grad_norm": 1.7111371666268231, "learning_rate": 9.821166975085774e-06, "loss": 0.7871, "step": 1763 }, { "epoch": 0.11, "grad_norm": 1.70452549534019, "learning_rate": 9.820892127624085e-06, "loss": 0.8272, "step": 1764 }, { "epoch": 0.11, "grad_norm": 1.5653926163040415, "learning_rate": 9.820617072970583e-06, "loss": 0.7447, "step": 1765 }, { "epoch": 0.11, "grad_norm": 1.5795712572485072, "learning_rate": 9.820341811137085e-06, "loss": 0.7444, "step": 1766 }, { "epoch": 0.11, "grad_norm": 1.6785984659689746, "learning_rate": 9.820066342135428e-06, "loss": 0.7436, "step": 1767 }, { "epoch": 0.11, "grad_norm": 1.766438976642095, "learning_rate": 9.819790665977447e-06, "loss": 0.8308, "step": 1768 }, { "epoch": 0.11, "grad_norm": 1.509862932360156, "learning_rate": 9.819514782674992e-06, "loss": 0.7084, "step": 1769 }, { "epoch": 0.11, "grad_norm": 1.6279662554887826, "learning_rate": 9.819238692239919e-06, "loss": 0.68, "step": 1770 }, { "epoch": 0.11, "grad_norm": 1.4214124471736456, "learning_rate": 9.818962394684094e-06, "loss": 0.704, "step": 1771 }, { "epoch": 0.11, "grad_norm": 1.071121025241116, "learning_rate": 9.818685890019389e-06, "loss": 0.8783, "step": 1772 }, { "epoch": 0.11, "grad_norm": 1.7515798070106208, "learning_rate": 9.818409178257691e-06, "loss": 0.7752, "step": 1773 }, { "epoch": 0.11, "grad_norm": 1.66151923419309, "learning_rate": 9.81813225941089e-06, "loss": 0.7488, "step": 1774 }, { "epoch": 0.11, "grad_norm": 1.6841647971012637, "learning_rate": 9.817855133490891e-06, "loss": 0.7779, "step": 1775 }, { "epoch": 0.11, "grad_norm": 1.5988628569598553, "learning_rate": 9.817577800509601e-06, "loss": 0.7466, "step": 1776 }, { "epoch": 0.11, "grad_norm": 1.121108760898924, "learning_rate": 9.817300260478942e-06, "loss": 0.7304, "step": 1777 }, { "epoch": 0.11, "grad_norm": 1.1420967276189065, "learning_rate": 9.817022513410838e-06, "loss": 0.5972, "step": 1778 }, { "epoch": 0.11, "grad_norm": 1.5322194452829452, "learning_rate": 9.816744559317228e-06, "loss": 0.7132, "step": 1779 }, { "epoch": 0.11, "grad_norm": 1.600834789005954, "learning_rate": 9.81646639821006e-06, "loss": 0.7348, "step": 1780 }, { "epoch": 0.11, "grad_norm": 1.1721587935297584, "learning_rate": 9.816188030101287e-06, "loss": 0.6465, "step": 1781 }, { "epoch": 0.11, "grad_norm": 1.5433530239048896, "learning_rate": 9.815909455002872e-06, "loss": 0.7213, "step": 1782 }, { "epoch": 0.11, "grad_norm": 2.758755080203243, "learning_rate": 9.815630672926789e-06, "loss": 0.8544, "step": 1783 }, { "epoch": 0.11, "grad_norm": 1.677267534396392, "learning_rate": 9.815351683885017e-06, "loss": 0.7945, "step": 1784 }, { "epoch": 0.11, "grad_norm": 1.5625077008889303, "learning_rate": 9.81507248788955e-06, "loss": 0.7578, "step": 1785 }, { "epoch": 0.11, "grad_norm": 1.6972582374672796, "learning_rate": 9.814793084952384e-06, "loss": 0.8226, "step": 1786 }, { "epoch": 0.11, "grad_norm": 1.388137010262355, "learning_rate": 9.814513475085528e-06, "loss": 0.7112, "step": 1787 }, { "epoch": 0.11, "grad_norm": 1.7483951751841922, "learning_rate": 9.814233658301002e-06, "loss": 0.8602, "step": 1788 }, { "epoch": 0.11, "grad_norm": 1.9003302577381969, "learning_rate": 9.813953634610827e-06, "loss": 0.84, "step": 1789 }, { "epoch": 0.11, "grad_norm": 1.6254158013972946, "learning_rate": 9.813673404027042e-06, "loss": 0.6984, "step": 1790 }, { "epoch": 0.11, "grad_norm": 1.3965693596728497, "learning_rate": 9.813392966561688e-06, "loss": 0.7646, "step": 1791 }, { "epoch": 0.11, "grad_norm": 1.6247713887998743, "learning_rate": 9.813112322226819e-06, "loss": 0.7358, "step": 1792 }, { "epoch": 0.11, "grad_norm": 2.2208826991816486, "learning_rate": 9.812831471034495e-06, "loss": 0.742, "step": 1793 }, { "epoch": 0.11, "grad_norm": 1.7127196008542238, "learning_rate": 9.812550412996788e-06, "loss": 0.7913, "step": 1794 }, { "epoch": 0.11, "grad_norm": 1.7017238983406708, "learning_rate": 9.812269148125779e-06, "loss": 0.7183, "step": 1795 }, { "epoch": 0.11, "grad_norm": 1.256282330158656, "learning_rate": 9.811987676433552e-06, "loss": 0.8335, "step": 1796 }, { "epoch": 0.12, "grad_norm": 1.18054580082267, "learning_rate": 9.811705997932206e-06, "loss": 0.6148, "step": 1797 }, { "epoch": 0.12, "grad_norm": 1.6919577980326204, "learning_rate": 9.811424112633847e-06, "loss": 0.8331, "step": 1798 }, { "epoch": 0.12, "grad_norm": 2.342213122920862, "learning_rate": 9.81114202055059e-06, "loss": 0.7286, "step": 1799 }, { "epoch": 0.12, "grad_norm": 1.5561148766934652, "learning_rate": 9.810859721694559e-06, "loss": 0.7311, "step": 1800 }, { "epoch": 0.12, "grad_norm": 1.708189586445991, "learning_rate": 9.810577216077886e-06, "loss": 0.726, "step": 1801 }, { "epoch": 0.12, "grad_norm": 1.6516196280291058, "learning_rate": 9.810294503712711e-06, "loss": 0.8235, "step": 1802 }, { "epoch": 0.12, "grad_norm": 1.7230834881238792, "learning_rate": 9.810011584611189e-06, "loss": 0.8429, "step": 1803 }, { "epoch": 0.12, "grad_norm": 1.7761105094764889, "learning_rate": 9.809728458785474e-06, "loss": 0.8008, "step": 1804 }, { "epoch": 0.12, "grad_norm": 1.6241687966989518, "learning_rate": 9.80944512624774e-06, "loss": 0.7131, "step": 1805 }, { "epoch": 0.12, "grad_norm": 1.726113878719947, "learning_rate": 9.809161587010156e-06, "loss": 0.6942, "step": 1806 }, { "epoch": 0.12, "grad_norm": 1.077212197495839, "learning_rate": 9.808877841084915e-06, "loss": 0.6482, "step": 1807 }, { "epoch": 0.12, "grad_norm": 1.4487663122341208, "learning_rate": 9.808593888484207e-06, "loss": 0.7797, "step": 1808 }, { "epoch": 0.12, "grad_norm": 1.487162408596356, "learning_rate": 9.808309729220241e-06, "loss": 0.78, "step": 1809 }, { "epoch": 0.12, "grad_norm": 1.0242366695661673, "learning_rate": 9.808025363305225e-06, "loss": 0.6142, "step": 1810 }, { "epoch": 0.12, "grad_norm": 1.6526510150688352, "learning_rate": 9.807740790751383e-06, "loss": 0.8136, "step": 1811 }, { "epoch": 0.12, "grad_norm": 1.5773034850932452, "learning_rate": 9.807456011570941e-06, "loss": 0.7669, "step": 1812 }, { "epoch": 0.12, "grad_norm": 1.5901864869792877, "learning_rate": 9.807171025776145e-06, "loss": 0.64, "step": 1813 }, { "epoch": 0.12, "grad_norm": 1.8526622635921155, "learning_rate": 9.806885833379238e-06, "loss": 0.7564, "step": 1814 }, { "epoch": 0.12, "grad_norm": 1.6056653908537788, "learning_rate": 9.806600434392479e-06, "loss": 0.834, "step": 1815 }, { "epoch": 0.12, "grad_norm": 1.8606843634901997, "learning_rate": 9.806314828828134e-06, "loss": 0.7306, "step": 1816 }, { "epoch": 0.12, "grad_norm": 1.5367685360177774, "learning_rate": 9.806029016698475e-06, "loss": 0.8044, "step": 1817 }, { "epoch": 0.12, "grad_norm": 1.1796633907189775, "learning_rate": 9.80574299801579e-06, "loss": 0.6753, "step": 1818 }, { "epoch": 0.12, "grad_norm": 1.6227119337587903, "learning_rate": 9.805456772792367e-06, "loss": 0.8105, "step": 1819 }, { "epoch": 0.12, "grad_norm": 1.6431299853209114, "learning_rate": 9.805170341040512e-06, "loss": 0.7246, "step": 1820 }, { "epoch": 0.12, "grad_norm": 1.9432693939233514, "learning_rate": 9.804883702772532e-06, "loss": 0.792, "step": 1821 }, { "epoch": 0.12, "grad_norm": 2.212408108802079, "learning_rate": 9.804596858000745e-06, "loss": 0.8394, "step": 1822 }, { "epoch": 0.12, "grad_norm": 1.6106216261607782, "learning_rate": 9.804309806737482e-06, "loss": 0.8265, "step": 1823 }, { "epoch": 0.12, "grad_norm": 1.426099956560461, "learning_rate": 9.80402254899508e-06, "loss": 0.7053, "step": 1824 }, { "epoch": 0.12, "grad_norm": 1.6659796264141395, "learning_rate": 9.803735084785884e-06, "loss": 0.7574, "step": 1825 }, { "epoch": 0.12, "grad_norm": 1.4704753092235854, "learning_rate": 9.803447414122245e-06, "loss": 0.7606, "step": 1826 }, { "epoch": 0.12, "grad_norm": 1.5402721872283325, "learning_rate": 9.803159537016533e-06, "loss": 0.7607, "step": 1827 }, { "epoch": 0.12, "grad_norm": 1.1665975526588328, "learning_rate": 9.802871453481114e-06, "loss": 0.6337, "step": 1828 }, { "epoch": 0.12, "grad_norm": 1.0962179976292328, "learning_rate": 9.802583163528374e-06, "loss": 0.6683, "step": 1829 }, { "epoch": 0.12, "grad_norm": 1.7469772611819745, "learning_rate": 9.8022946671707e-06, "loss": 0.8379, "step": 1830 }, { "epoch": 0.12, "grad_norm": 1.0569440181733583, "learning_rate": 9.802005964420493e-06, "loss": 0.6794, "step": 1831 }, { "epoch": 0.12, "grad_norm": 1.7413715363135018, "learning_rate": 9.801717055290162e-06, "loss": 0.7126, "step": 1832 }, { "epoch": 0.12, "grad_norm": 1.696102213938122, "learning_rate": 9.801427939792118e-06, "loss": 0.7319, "step": 1833 }, { "epoch": 0.12, "grad_norm": 2.227729747460165, "learning_rate": 9.801138617938793e-06, "loss": 0.7245, "step": 1834 }, { "epoch": 0.12, "grad_norm": 1.5872619344630223, "learning_rate": 9.800849089742617e-06, "loss": 0.7401, "step": 1835 }, { "epoch": 0.12, "grad_norm": 1.5813580962124565, "learning_rate": 9.800559355216035e-06, "loss": 0.8112, "step": 1836 }, { "epoch": 0.12, "grad_norm": 1.4733939656520951, "learning_rate": 9.800269414371501e-06, "loss": 0.6994, "step": 1837 }, { "epoch": 0.12, "grad_norm": 1.717550287713349, "learning_rate": 9.799979267221473e-06, "loss": 0.8325, "step": 1838 }, { "epoch": 0.12, "grad_norm": 1.8110524518062983, "learning_rate": 9.799688913778423e-06, "loss": 0.7601, "step": 1839 }, { "epoch": 0.12, "grad_norm": 1.8217530717893298, "learning_rate": 9.799398354054828e-06, "loss": 0.7366, "step": 1840 }, { "epoch": 0.12, "grad_norm": 1.699441897142735, "learning_rate": 9.799107588063178e-06, "loss": 0.8556, "step": 1841 }, { "epoch": 0.12, "grad_norm": 2.1866184210203503, "learning_rate": 9.798816615815967e-06, "loss": 0.897, "step": 1842 }, { "epoch": 0.12, "grad_norm": 1.830331614655377, "learning_rate": 9.798525437325704e-06, "loss": 0.7436, "step": 1843 }, { "epoch": 0.12, "grad_norm": 1.3858556469402894, "learning_rate": 9.798234052604898e-06, "loss": 0.7597, "step": 1844 }, { "epoch": 0.12, "grad_norm": 1.8535423934163855, "learning_rate": 9.797942461666075e-06, "loss": 0.8805, "step": 1845 }, { "epoch": 0.12, "grad_norm": 1.3784109912948543, "learning_rate": 9.797650664521768e-06, "loss": 0.6607, "step": 1846 }, { "epoch": 0.12, "grad_norm": 1.59440943497434, "learning_rate": 9.797358661184517e-06, "loss": 0.7202, "step": 1847 }, { "epoch": 0.12, "grad_norm": 1.5885891503283773, "learning_rate": 9.79706645166687e-06, "loss": 0.7724, "step": 1848 }, { "epoch": 0.12, "grad_norm": 1.5017559411840176, "learning_rate": 9.796774035981388e-06, "loss": 0.7289, "step": 1849 }, { "epoch": 0.12, "grad_norm": 1.6559753794812664, "learning_rate": 9.796481414140637e-06, "loss": 0.7693, "step": 1850 }, { "epoch": 0.12, "grad_norm": 1.5131576251124113, "learning_rate": 9.796188586157194e-06, "loss": 0.7617, "step": 1851 }, { "epoch": 0.12, "grad_norm": 1.838361894515427, "learning_rate": 9.795895552043644e-06, "loss": 0.6536, "step": 1852 }, { "epoch": 0.12, "grad_norm": 1.542785793425092, "learning_rate": 9.79560231181258e-06, "loss": 0.7785, "step": 1853 }, { "epoch": 0.12, "grad_norm": 1.5934476016996213, "learning_rate": 9.795308865476605e-06, "loss": 0.7293, "step": 1854 }, { "epoch": 0.12, "grad_norm": 1.6294707745878267, "learning_rate": 9.795015213048334e-06, "loss": 0.7908, "step": 1855 }, { "epoch": 0.12, "grad_norm": 1.1477575829291133, "learning_rate": 9.794721354540382e-06, "loss": 0.6211, "step": 1856 }, { "epoch": 0.12, "grad_norm": 1.6123924860061603, "learning_rate": 9.794427289965383e-06, "loss": 0.6774, "step": 1857 }, { "epoch": 0.12, "grad_norm": 1.6802669978484641, "learning_rate": 9.794133019335972e-06, "loss": 0.7355, "step": 1858 }, { "epoch": 0.12, "grad_norm": 1.6497107033687868, "learning_rate": 9.793838542664797e-06, "loss": 0.783, "step": 1859 }, { "epoch": 0.12, "grad_norm": 1.5180402926627337, "learning_rate": 9.793543859964518e-06, "loss": 0.7408, "step": 1860 }, { "epoch": 0.12, "grad_norm": 1.6482325600500183, "learning_rate": 9.793248971247792e-06, "loss": 0.7154, "step": 1861 }, { "epoch": 0.12, "grad_norm": 1.7403669581533932, "learning_rate": 9.792953876527301e-06, "loss": 0.8429, "step": 1862 }, { "epoch": 0.12, "grad_norm": 1.546277751816079, "learning_rate": 9.792658575815724e-06, "loss": 0.73, "step": 1863 }, { "epoch": 0.12, "grad_norm": 1.6414164926679675, "learning_rate": 9.792363069125749e-06, "loss": 0.6568, "step": 1864 }, { "epoch": 0.12, "grad_norm": 1.5320468984336972, "learning_rate": 9.792067356470083e-06, "loss": 0.7255, "step": 1865 }, { "epoch": 0.12, "grad_norm": 1.823941637294585, "learning_rate": 9.79177143786143e-06, "loss": 0.6394, "step": 1866 }, { "epoch": 0.12, "grad_norm": 1.7702291587207812, "learning_rate": 9.791475313312508e-06, "loss": 0.8504, "step": 1867 }, { "epoch": 0.12, "grad_norm": 1.5906187605614313, "learning_rate": 9.791178982836048e-06, "loss": 0.7441, "step": 1868 }, { "epoch": 0.12, "grad_norm": 1.8330933517929286, "learning_rate": 9.79088244644478e-06, "loss": 0.811, "step": 1869 }, { "epoch": 0.12, "grad_norm": 1.1150217840784726, "learning_rate": 9.790585704151453e-06, "loss": 0.6408, "step": 1870 }, { "epoch": 0.12, "grad_norm": 1.660980421799879, "learning_rate": 9.79028875596882e-06, "loss": 0.7124, "step": 1871 }, { "epoch": 0.12, "grad_norm": 1.5376975188831012, "learning_rate": 9.78999160190964e-06, "loss": 0.7682, "step": 1872 }, { "epoch": 0.12, "grad_norm": 1.7527282710641499, "learning_rate": 9.789694241986687e-06, "loss": 0.7775, "step": 1873 }, { "epoch": 0.12, "grad_norm": 1.6386334048853892, "learning_rate": 9.789396676212742e-06, "loss": 0.79, "step": 1874 }, { "epoch": 0.12, "grad_norm": 1.6425999537122586, "learning_rate": 9.789098904600588e-06, "loss": 0.7874, "step": 1875 }, { "epoch": 0.12, "grad_norm": 1.6759328842979109, "learning_rate": 9.788800927163028e-06, "loss": 0.7574, "step": 1876 }, { "epoch": 0.12, "grad_norm": 1.607321710638501, "learning_rate": 9.788502743912866e-06, "loss": 0.6737, "step": 1877 }, { "epoch": 0.12, "grad_norm": 1.1266655364683689, "learning_rate": 9.78820435486292e-06, "loss": 0.6364, "step": 1878 }, { "epoch": 0.12, "grad_norm": 1.4242714850287244, "learning_rate": 9.787905760026011e-06, "loss": 0.7609, "step": 1879 }, { "epoch": 0.12, "grad_norm": 1.2089999721515066, "learning_rate": 9.787606959414975e-06, "loss": 0.69, "step": 1880 }, { "epoch": 0.12, "grad_norm": 1.5090264965410602, "learning_rate": 9.78730795304265e-06, "loss": 0.7148, "step": 1881 }, { "epoch": 0.12, "grad_norm": 1.6595989395106714, "learning_rate": 9.787008740921889e-06, "loss": 0.7711, "step": 1882 }, { "epoch": 0.12, "grad_norm": 1.5547035355367027, "learning_rate": 9.786709323065553e-06, "loss": 0.7462, "step": 1883 }, { "epoch": 0.12, "grad_norm": 1.6431737928562735, "learning_rate": 9.786409699486506e-06, "loss": 0.7055, "step": 1884 }, { "epoch": 0.12, "grad_norm": 2.001283792950453, "learning_rate": 9.786109870197629e-06, "loss": 0.8351, "step": 1885 }, { "epoch": 0.12, "grad_norm": 1.6091229766195723, "learning_rate": 9.785809835211806e-06, "loss": 0.8126, "step": 1886 }, { "epoch": 0.12, "grad_norm": 1.7093441165484555, "learning_rate": 9.785509594541934e-06, "loss": 0.7701, "step": 1887 }, { "epoch": 0.12, "grad_norm": 1.8200286155568788, "learning_rate": 9.785209148200913e-06, "loss": 0.6997, "step": 1888 }, { "epoch": 0.12, "grad_norm": 1.685830040051834, "learning_rate": 9.78490849620166e-06, "loss": 0.8567, "step": 1889 }, { "epoch": 0.12, "grad_norm": 1.7034572313848138, "learning_rate": 9.784607638557094e-06, "loss": 0.6905, "step": 1890 }, { "epoch": 0.12, "grad_norm": 1.605122359851703, "learning_rate": 9.784306575280146e-06, "loss": 0.7804, "step": 1891 }, { "epoch": 0.12, "grad_norm": 1.3056898363269527, "learning_rate": 9.784005306383754e-06, "loss": 0.7697, "step": 1892 }, { "epoch": 0.12, "grad_norm": 1.606240332991692, "learning_rate": 9.783703831880867e-06, "loss": 0.7594, "step": 1893 }, { "epoch": 0.12, "grad_norm": 1.5485160344974753, "learning_rate": 9.78340215178444e-06, "loss": 0.7501, "step": 1894 }, { "epoch": 0.12, "grad_norm": 1.7438264453662415, "learning_rate": 9.78310026610744e-06, "loss": 0.7549, "step": 1895 }, { "epoch": 0.12, "grad_norm": 1.616555185014795, "learning_rate": 9.782798174862842e-06, "loss": 0.8857, "step": 1896 }, { "epoch": 0.12, "grad_norm": 1.3570510117202035, "learning_rate": 9.78249587806363e-06, "loss": 0.6491, "step": 1897 }, { "epoch": 0.12, "grad_norm": 2.2113567045265072, "learning_rate": 9.782193375722792e-06, "loss": 0.8994, "step": 1898 }, { "epoch": 0.12, "grad_norm": 1.0793843101473977, "learning_rate": 9.78189066785333e-06, "loss": 0.6124, "step": 1899 }, { "epoch": 0.12, "grad_norm": 1.5701285887793015, "learning_rate": 9.78158775446826e-06, "loss": 0.7255, "step": 1900 }, { "epoch": 0.12, "grad_norm": 2.097179799968141, "learning_rate": 9.781284635580593e-06, "loss": 0.7395, "step": 1901 }, { "epoch": 0.12, "grad_norm": 1.620870543392498, "learning_rate": 9.780981311203359e-06, "loss": 0.7456, "step": 1902 }, { "epoch": 0.12, "grad_norm": 1.651076250829252, "learning_rate": 9.780677781349594e-06, "loss": 0.8006, "step": 1903 }, { "epoch": 0.12, "grad_norm": 1.7537333531320811, "learning_rate": 9.780374046032345e-06, "loss": 0.8141, "step": 1904 }, { "epoch": 0.12, "grad_norm": 1.5526982274545227, "learning_rate": 9.780070105264665e-06, "loss": 0.6848, "step": 1905 }, { "epoch": 0.12, "grad_norm": 1.6251402641218666, "learning_rate": 9.779765959059616e-06, "loss": 0.7429, "step": 1906 }, { "epoch": 0.12, "grad_norm": 1.1938267367035513, "learning_rate": 9.779461607430268e-06, "loss": 0.6447, "step": 1907 }, { "epoch": 0.12, "grad_norm": 1.1023020238141588, "learning_rate": 9.779157050389706e-06, "loss": 0.6516, "step": 1908 }, { "epoch": 0.12, "grad_norm": 1.7202169296180616, "learning_rate": 9.778852287951013e-06, "loss": 0.823, "step": 1909 }, { "epoch": 0.12, "grad_norm": 1.5180016199624191, "learning_rate": 9.778547320127291e-06, "loss": 0.831, "step": 1910 }, { "epoch": 0.12, "grad_norm": 1.6341077823032129, "learning_rate": 9.77824214693165e-06, "loss": 0.8005, "step": 1911 }, { "epoch": 0.12, "grad_norm": 1.6607751777587831, "learning_rate": 9.777936768377197e-06, "loss": 0.736, "step": 1912 }, { "epoch": 0.12, "grad_norm": 1.4864403057554558, "learning_rate": 9.777631184477065e-06, "loss": 0.7564, "step": 1913 }, { "epoch": 0.12, "grad_norm": 1.602016645000965, "learning_rate": 9.777325395244381e-06, "loss": 0.7111, "step": 1914 }, { "epoch": 0.12, "grad_norm": 1.8609130312880107, "learning_rate": 9.777019400692292e-06, "loss": 0.8239, "step": 1915 }, { "epoch": 0.12, "grad_norm": 1.5724694339089875, "learning_rate": 9.776713200833948e-06, "loss": 0.847, "step": 1916 }, { "epoch": 0.12, "grad_norm": 1.7145928214148831, "learning_rate": 9.776406795682506e-06, "loss": 0.7694, "step": 1917 }, { "epoch": 0.12, "grad_norm": 1.4327315331372232, "learning_rate": 9.776100185251138e-06, "loss": 0.7344, "step": 1918 }, { "epoch": 0.12, "grad_norm": 1.7473899958747783, "learning_rate": 9.775793369553019e-06, "loss": 0.7818, "step": 1919 }, { "epoch": 0.12, "grad_norm": 1.7480792174823194, "learning_rate": 9.775486348601336e-06, "loss": 0.844, "step": 1920 }, { "epoch": 0.12, "grad_norm": 1.0599318484183478, "learning_rate": 9.775179122409283e-06, "loss": 0.6809, "step": 1921 }, { "epoch": 0.12, "grad_norm": 1.6143786268672875, "learning_rate": 9.774871690990069e-06, "loss": 0.8047, "step": 1922 }, { "epoch": 0.12, "grad_norm": 2.203937014524049, "learning_rate": 9.7745640543569e-06, "loss": 0.7634, "step": 1923 }, { "epoch": 0.12, "grad_norm": 1.4906472813917724, "learning_rate": 9.774256212523003e-06, "loss": 0.7634, "step": 1924 }, { "epoch": 0.12, "grad_norm": 1.8623576783719333, "learning_rate": 9.773948165501606e-06, "loss": 0.8689, "step": 1925 }, { "epoch": 0.12, "grad_norm": 3.0174558476367914, "learning_rate": 9.773639913305946e-06, "loss": 0.7555, "step": 1926 }, { "epoch": 0.12, "grad_norm": 1.450146197130209, "learning_rate": 9.773331455949275e-06, "loss": 0.7528, "step": 1927 }, { "epoch": 0.12, "grad_norm": 1.623686191426303, "learning_rate": 9.773022793444848e-06, "loss": 0.7233, "step": 1928 }, { "epoch": 0.12, "grad_norm": 1.6740905605777017, "learning_rate": 9.772713925805932e-06, "loss": 0.69, "step": 1929 }, { "epoch": 0.12, "grad_norm": 1.4404279402138958, "learning_rate": 9.772404853045798e-06, "loss": 0.7416, "step": 1930 }, { "epoch": 0.12, "grad_norm": 1.6491776852921902, "learning_rate": 9.772095575177733e-06, "loss": 0.6745, "step": 1931 }, { "epoch": 0.12, "grad_norm": 1.370319474923888, "learning_rate": 9.771786092215027e-06, "loss": 0.6837, "step": 1932 }, { "epoch": 0.12, "grad_norm": 1.6780373745675472, "learning_rate": 9.771476404170984e-06, "loss": 0.6952, "step": 1933 }, { "epoch": 0.12, "grad_norm": 1.8069628446358836, "learning_rate": 9.771166511058908e-06, "loss": 0.8677, "step": 1934 }, { "epoch": 0.12, "grad_norm": 1.3239956633423373, "learning_rate": 9.770856412892122e-06, "loss": 0.7027, "step": 1935 }, { "epoch": 0.12, "grad_norm": 1.6573580884438033, "learning_rate": 9.770546109683953e-06, "loss": 0.8379, "step": 1936 }, { "epoch": 0.12, "grad_norm": 1.6247364507942599, "learning_rate": 9.770235601447737e-06, "loss": 0.7008, "step": 1937 }, { "epoch": 0.12, "grad_norm": 1.5938690807367766, "learning_rate": 9.769924888196817e-06, "loss": 0.759, "step": 1938 }, { "epoch": 0.12, "grad_norm": 1.7388062195710356, "learning_rate": 9.769613969944549e-06, "loss": 0.8049, "step": 1939 }, { "epoch": 0.12, "grad_norm": 1.99388842891875, "learning_rate": 9.769302846704295e-06, "loss": 0.7515, "step": 1940 }, { "epoch": 0.12, "grad_norm": 1.8380150299259042, "learning_rate": 9.768991518489427e-06, "loss": 0.7885, "step": 1941 }, { "epoch": 0.12, "grad_norm": 2.1982924378820137, "learning_rate": 9.768679985313323e-06, "loss": 0.7703, "step": 1942 }, { "epoch": 0.12, "grad_norm": 1.0395876375450652, "learning_rate": 9.768368247189375e-06, "loss": 0.675, "step": 1943 }, { "epoch": 0.12, "grad_norm": 1.5035830363956268, "learning_rate": 9.76805630413098e-06, "loss": 0.7902, "step": 1944 }, { "epoch": 0.12, "grad_norm": 1.4203823490737046, "learning_rate": 9.767744156151542e-06, "loss": 0.7776, "step": 1945 }, { "epoch": 0.12, "grad_norm": 1.6785127476796022, "learning_rate": 9.76743180326448e-06, "loss": 0.7756, "step": 1946 }, { "epoch": 0.12, "grad_norm": 1.6799167545569442, "learning_rate": 9.767119245483217e-06, "loss": 0.6958, "step": 1947 }, { "epoch": 0.12, "grad_norm": 1.6665175365064946, "learning_rate": 9.766806482821185e-06, "loss": 0.7868, "step": 1948 }, { "epoch": 0.12, "grad_norm": 1.4620010904595817, "learning_rate": 9.766493515291827e-06, "loss": 0.645, "step": 1949 }, { "epoch": 0.12, "grad_norm": 1.080454954699842, "learning_rate": 9.766180342908593e-06, "loss": 0.6988, "step": 1950 }, { "epoch": 0.12, "grad_norm": 1.6097655602851133, "learning_rate": 9.765866965684944e-06, "loss": 0.8375, "step": 1951 }, { "epoch": 0.12, "grad_norm": 1.8055482000743381, "learning_rate": 9.765553383634347e-06, "loss": 0.7564, "step": 1952 }, { "epoch": 0.13, "grad_norm": 3.4342654941878026, "learning_rate": 9.76523959677028e-06, "loss": 0.8106, "step": 1953 }, { "epoch": 0.13, "grad_norm": 1.6854316277979073, "learning_rate": 9.764925605106228e-06, "loss": 0.7563, "step": 1954 }, { "epoch": 0.13, "grad_norm": 1.1960112624729036, "learning_rate": 9.764611408655687e-06, "loss": 0.6533, "step": 1955 }, { "epoch": 0.13, "grad_norm": 1.6896622489955293, "learning_rate": 9.764297007432158e-06, "loss": 0.6916, "step": 1956 }, { "epoch": 0.13, "grad_norm": 1.524569082476449, "learning_rate": 9.763982401449154e-06, "loss": 0.7951, "step": 1957 }, { "epoch": 0.13, "grad_norm": 1.1910743539611852, "learning_rate": 9.763667590720197e-06, "loss": 0.6458, "step": 1958 }, { "epoch": 0.13, "grad_norm": 1.8420770140287355, "learning_rate": 9.763352575258817e-06, "loss": 0.8978, "step": 1959 }, { "epoch": 0.13, "grad_norm": 1.6643832645078966, "learning_rate": 9.763037355078553e-06, "loss": 0.7001, "step": 1960 }, { "epoch": 0.13, "grad_norm": 1.0084265787125786, "learning_rate": 9.762721930192953e-06, "loss": 0.6928, "step": 1961 }, { "epoch": 0.13, "grad_norm": 1.6983066157616467, "learning_rate": 9.762406300615572e-06, "loss": 0.7011, "step": 1962 }, { "epoch": 0.13, "grad_norm": 1.6980708068226438, "learning_rate": 9.762090466359975e-06, "loss": 0.7954, "step": 1963 }, { "epoch": 0.13, "grad_norm": 1.6412582616555844, "learning_rate": 9.761774427439736e-06, "loss": 0.7118, "step": 1964 }, { "epoch": 0.13, "grad_norm": 2.0185861883683494, "learning_rate": 9.761458183868437e-06, "loss": 0.7548, "step": 1965 }, { "epoch": 0.13, "grad_norm": 1.7199567715267698, "learning_rate": 9.761141735659671e-06, "loss": 0.798, "step": 1966 }, { "epoch": 0.13, "grad_norm": 1.1249209197741517, "learning_rate": 9.760825082827039e-06, "loss": 0.6432, "step": 1967 }, { "epoch": 0.13, "grad_norm": 1.7173252659837173, "learning_rate": 9.760508225384149e-06, "loss": 0.7007, "step": 1968 }, { "epoch": 0.13, "grad_norm": 1.845337807685598, "learning_rate": 9.760191163344617e-06, "loss": 0.7458, "step": 1969 }, { "epoch": 0.13, "grad_norm": 1.126671226057601, "learning_rate": 9.759873896722072e-06, "loss": 0.7142, "step": 1970 }, { "epoch": 0.13, "grad_norm": 1.185994479350096, "learning_rate": 9.75955642553015e-06, "loss": 0.6329, "step": 1971 }, { "epoch": 0.13, "grad_norm": 1.563183101060908, "learning_rate": 9.759238749782491e-06, "loss": 0.7266, "step": 1972 }, { "epoch": 0.13, "grad_norm": 1.6654362235697189, "learning_rate": 9.758920869492755e-06, "loss": 0.8691, "step": 1973 }, { "epoch": 0.13, "grad_norm": 1.5286395037886178, "learning_rate": 9.758602784674597e-06, "loss": 0.8081, "step": 1974 }, { "epoch": 0.13, "grad_norm": 1.2893864588067039, "learning_rate": 9.758284495341692e-06, "loss": 0.6019, "step": 1975 }, { "epoch": 0.13, "grad_norm": 1.570679004663212, "learning_rate": 9.757966001507718e-06, "loss": 0.7991, "step": 1976 }, { "epoch": 0.13, "grad_norm": 1.8482751619804043, "learning_rate": 9.757647303186361e-06, "loss": 0.8112, "step": 1977 }, { "epoch": 0.13, "grad_norm": 1.3746730386806385, "learning_rate": 9.757328400391321e-06, "loss": 0.6759, "step": 1978 }, { "epoch": 0.13, "grad_norm": 1.7441152842693532, "learning_rate": 9.757009293136303e-06, "loss": 0.7694, "step": 1979 }, { "epoch": 0.13, "grad_norm": 1.2362155977139462, "learning_rate": 9.756689981435024e-06, "loss": 0.7989, "step": 1980 }, { "epoch": 0.13, "grad_norm": 1.6743397518335557, "learning_rate": 9.756370465301202e-06, "loss": 0.7675, "step": 1981 }, { "epoch": 0.13, "grad_norm": 1.5564475391834964, "learning_rate": 9.756050744748574e-06, "loss": 0.7578, "step": 1982 }, { "epoch": 0.13, "grad_norm": 1.634475084910378, "learning_rate": 9.755730819790876e-06, "loss": 0.7069, "step": 1983 }, { "epoch": 0.13, "grad_norm": 1.5203108584930085, "learning_rate": 9.755410690441862e-06, "loss": 0.7476, "step": 1984 }, { "epoch": 0.13, "grad_norm": 1.2306583623099812, "learning_rate": 9.755090356715288e-06, "loss": 0.6584, "step": 1985 }, { "epoch": 0.13, "grad_norm": 3.4603111269412925, "learning_rate": 9.754769818624925e-06, "loss": 0.7318, "step": 1986 }, { "epoch": 0.13, "grad_norm": 1.4809936873885894, "learning_rate": 9.754449076184544e-06, "loss": 0.7226, "step": 1987 }, { "epoch": 0.13, "grad_norm": 2.020116063294601, "learning_rate": 9.754128129407932e-06, "loss": 0.7129, "step": 1988 }, { "epoch": 0.13, "grad_norm": 1.42579468678054, "learning_rate": 9.753806978308884e-06, "loss": 0.7848, "step": 1989 }, { "epoch": 0.13, "grad_norm": 1.7533696907123015, "learning_rate": 9.7534856229012e-06, "loss": 0.7815, "step": 1990 }, { "epoch": 0.13, "grad_norm": 1.606763952595462, "learning_rate": 9.753164063198693e-06, "loss": 0.7803, "step": 1991 }, { "epoch": 0.13, "grad_norm": 2.0246171669858626, "learning_rate": 9.752842299215184e-06, "loss": 0.6916, "step": 1992 }, { "epoch": 0.13, "grad_norm": 1.66886085075487, "learning_rate": 9.752520330964497e-06, "loss": 0.8382, "step": 1993 }, { "epoch": 0.13, "grad_norm": 1.6101398486312233, "learning_rate": 9.752198158460475e-06, "loss": 0.896, "step": 1994 }, { "epoch": 0.13, "grad_norm": 1.921291908985781, "learning_rate": 9.75187578171696e-06, "loss": 0.7618, "step": 1995 }, { "epoch": 0.13, "grad_norm": 1.5381801183107724, "learning_rate": 9.751553200747811e-06, "loss": 0.6953, "step": 1996 }, { "epoch": 0.13, "grad_norm": 1.4446758004764493, "learning_rate": 9.751230415566888e-06, "loss": 0.7882, "step": 1997 }, { "epoch": 0.13, "grad_norm": 1.7337141370375113, "learning_rate": 9.750907426188067e-06, "loss": 0.7229, "step": 1998 }, { "epoch": 0.13, "grad_norm": 1.7601354194269925, "learning_rate": 9.750584232625226e-06, "loss": 0.7754, "step": 1999 }, { "epoch": 0.13, "grad_norm": 1.48138770235886, "learning_rate": 9.750260834892259e-06, "loss": 0.7194, "step": 2000 }, { "epoch": 0.13, "grad_norm": 1.1362160719753867, "learning_rate": 9.749937233003062e-06, "loss": 0.7371, "step": 2001 }, { "epoch": 0.13, "grad_norm": 1.6994296411531802, "learning_rate": 9.749613426971544e-06, "loss": 0.8029, "step": 2002 }, { "epoch": 0.13, "grad_norm": 1.8166361739698578, "learning_rate": 9.749289416811622e-06, "loss": 0.7019, "step": 2003 }, { "epoch": 0.13, "grad_norm": 1.8740569112957863, "learning_rate": 9.74896520253722e-06, "loss": 0.7527, "step": 2004 }, { "epoch": 0.13, "grad_norm": 1.343021581834484, "learning_rate": 9.74864078416227e-06, "loss": 0.6838, "step": 2005 }, { "epoch": 0.13, "grad_norm": 14.293932347000604, "learning_rate": 9.74831616170072e-06, "loss": 0.7648, "step": 2006 }, { "epoch": 0.13, "grad_norm": 1.7350773442609875, "learning_rate": 9.747991335166518e-06, "loss": 0.8418, "step": 2007 }, { "epoch": 0.13, "grad_norm": 2.1024853615096903, "learning_rate": 9.747666304573626e-06, "loss": 0.888, "step": 2008 }, { "epoch": 0.13, "grad_norm": 1.44561388574355, "learning_rate": 9.747341069936013e-06, "loss": 0.6547, "step": 2009 }, { "epoch": 0.13, "grad_norm": 1.6837288999765654, "learning_rate": 9.747015631267656e-06, "loss": 0.7867, "step": 2010 }, { "epoch": 0.13, "grad_norm": 1.7446102264515062, "learning_rate": 9.74668998858254e-06, "loss": 0.8452, "step": 2011 }, { "epoch": 0.13, "grad_norm": 2.568222987193163, "learning_rate": 9.746364141894665e-06, "loss": 0.7412, "step": 2012 }, { "epoch": 0.13, "grad_norm": 1.68716191578418, "learning_rate": 9.746038091218032e-06, "loss": 0.817, "step": 2013 }, { "epoch": 0.13, "grad_norm": 1.6064395730167116, "learning_rate": 9.745711836566653e-06, "loss": 0.7802, "step": 2014 }, { "epoch": 0.13, "grad_norm": 1.500363160263568, "learning_rate": 9.745385377954555e-06, "loss": 0.7954, "step": 2015 }, { "epoch": 0.13, "grad_norm": 1.7101396251370988, "learning_rate": 9.745058715395761e-06, "loss": 0.7995, "step": 2016 }, { "epoch": 0.13, "grad_norm": 1.6050381148484132, "learning_rate": 9.744731848904316e-06, "loss": 0.7174, "step": 2017 }, { "epoch": 0.13, "grad_norm": 1.659766347292961, "learning_rate": 9.744404778494267e-06, "loss": 0.7795, "step": 2018 }, { "epoch": 0.13, "grad_norm": 1.901289434902971, "learning_rate": 9.74407750417967e-06, "loss": 0.843, "step": 2019 }, { "epoch": 0.13, "grad_norm": 1.761624209350893, "learning_rate": 9.743750025974587e-06, "loss": 0.7051, "step": 2020 }, { "epoch": 0.13, "grad_norm": 1.6275249350992043, "learning_rate": 9.743422343893099e-06, "loss": 0.7167, "step": 2021 }, { "epoch": 0.13, "grad_norm": 1.135126644786556, "learning_rate": 9.743094457949286e-06, "loss": 0.6498, "step": 2022 }, { "epoch": 0.13, "grad_norm": 1.725033156959717, "learning_rate": 9.742766368157239e-06, "loss": 0.8215, "step": 2023 }, { "epoch": 0.13, "grad_norm": 1.4839499312837647, "learning_rate": 9.74243807453106e-06, "loss": 0.705, "step": 2024 }, { "epoch": 0.13, "grad_norm": 1.6601340881617088, "learning_rate": 9.742109577084857e-06, "loss": 0.7154, "step": 2025 }, { "epoch": 0.13, "grad_norm": 1.7798260150517577, "learning_rate": 9.74178087583275e-06, "loss": 0.8013, "step": 2026 }, { "epoch": 0.13, "grad_norm": 1.7154700930841893, "learning_rate": 9.741451970788863e-06, "loss": 0.7691, "step": 2027 }, { "epoch": 0.13, "grad_norm": 1.4951000385286253, "learning_rate": 9.741122861967335e-06, "loss": 0.7447, "step": 2028 }, { "epoch": 0.13, "grad_norm": 2.093813260020863, "learning_rate": 9.740793549382308e-06, "loss": 0.7058, "step": 2029 }, { "epoch": 0.13, "grad_norm": 1.6577950710520684, "learning_rate": 9.740464033047937e-06, "loss": 0.7861, "step": 2030 }, { "epoch": 0.13, "grad_norm": 1.6451005381825419, "learning_rate": 9.740134312978382e-06, "loss": 0.6898, "step": 2031 }, { "epoch": 0.13, "grad_norm": 1.6816130936179026, "learning_rate": 9.739804389187814e-06, "loss": 0.7213, "step": 2032 }, { "epoch": 0.13, "grad_norm": 1.7146600757685182, "learning_rate": 9.739474261690415e-06, "loss": 0.7286, "step": 2033 }, { "epoch": 0.13, "grad_norm": 1.953013731542068, "learning_rate": 9.73914393050037e-06, "loss": 0.8043, "step": 2034 }, { "epoch": 0.13, "grad_norm": 1.5268686578881405, "learning_rate": 9.73881339563188e-06, "loss": 0.6942, "step": 2035 }, { "epoch": 0.13, "grad_norm": 1.575589600331054, "learning_rate": 9.738482657099144e-06, "loss": 0.7274, "step": 2036 }, { "epoch": 0.13, "grad_norm": 1.6556334810354245, "learning_rate": 9.738151714916382e-06, "loss": 0.6649, "step": 2037 }, { "epoch": 0.13, "grad_norm": 1.3571410574866258, "learning_rate": 9.737820569097815e-06, "loss": 0.7849, "step": 2038 }, { "epoch": 0.13, "grad_norm": 1.4918342035299406, "learning_rate": 9.737489219657676e-06, "loss": 0.7037, "step": 2039 }, { "epoch": 0.13, "grad_norm": 1.6328297332909751, "learning_rate": 9.737157666610204e-06, "loss": 0.6852, "step": 2040 }, { "epoch": 0.13, "grad_norm": 1.5838490708092121, "learning_rate": 9.73682590996965e-06, "loss": 0.7705, "step": 2041 }, { "epoch": 0.13, "grad_norm": 1.634576180339183, "learning_rate": 9.736493949750273e-06, "loss": 0.7761, "step": 2042 }, { "epoch": 0.13, "grad_norm": 1.7057384652462142, "learning_rate": 9.736161785966339e-06, "loss": 0.8296, "step": 2043 }, { "epoch": 0.13, "grad_norm": 1.8807100736180828, "learning_rate": 9.735829418632121e-06, "loss": 0.7841, "step": 2044 }, { "epoch": 0.13, "grad_norm": 1.7091951640627814, "learning_rate": 9.735496847761908e-06, "loss": 0.8633, "step": 2045 }, { "epoch": 0.13, "grad_norm": 1.4963307014554432, "learning_rate": 9.735164073369992e-06, "loss": 0.7282, "step": 2046 }, { "epoch": 0.13, "grad_norm": 1.6900283791847899, "learning_rate": 9.734831095470671e-06, "loss": 0.7662, "step": 2047 }, { "epoch": 0.13, "grad_norm": 1.61246902958708, "learning_rate": 9.734497914078261e-06, "loss": 0.6682, "step": 2048 }, { "epoch": 0.13, "grad_norm": 1.5325458266079774, "learning_rate": 9.73416452920708e-06, "loss": 0.7589, "step": 2049 }, { "epoch": 0.13, "grad_norm": 1.8948417958693637, "learning_rate": 9.733830940871454e-06, "loss": 0.7475, "step": 2050 }, { "epoch": 0.13, "grad_norm": 1.6658535794983085, "learning_rate": 9.73349714908572e-06, "loss": 0.7166, "step": 2051 }, { "epoch": 0.13, "grad_norm": 1.608352219211602, "learning_rate": 9.733163153864227e-06, "loss": 0.7533, "step": 2052 }, { "epoch": 0.13, "grad_norm": 1.7263448255946203, "learning_rate": 9.732828955221328e-06, "loss": 0.7584, "step": 2053 }, { "epoch": 0.13, "grad_norm": 1.5667531709221159, "learning_rate": 9.732494553171384e-06, "loss": 0.7242, "step": 2054 }, { "epoch": 0.13, "grad_norm": 1.632977959235076, "learning_rate": 9.732159947728767e-06, "loss": 0.7469, "step": 2055 }, { "epoch": 0.13, "grad_norm": 1.7706032009503525, "learning_rate": 9.731825138907863e-06, "loss": 0.8239, "step": 2056 }, { "epoch": 0.13, "grad_norm": 1.6718315882939045, "learning_rate": 9.731490126723053e-06, "loss": 0.7704, "step": 2057 }, { "epoch": 0.13, "grad_norm": 1.5048511919529493, "learning_rate": 9.731154911188742e-06, "loss": 0.7177, "step": 2058 }, { "epoch": 0.13, "grad_norm": 1.9499391598871294, "learning_rate": 9.730819492319336e-06, "loss": 0.7371, "step": 2059 }, { "epoch": 0.13, "grad_norm": 1.8147140583958155, "learning_rate": 9.730483870129247e-06, "loss": 0.7439, "step": 2060 }, { "epoch": 0.13, "grad_norm": 1.382070808037671, "learning_rate": 9.730148044632902e-06, "loss": 0.6796, "step": 2061 }, { "epoch": 0.13, "grad_norm": 1.6481547822687383, "learning_rate": 9.729812015844733e-06, "loss": 0.7596, "step": 2062 }, { "epoch": 0.13, "grad_norm": 1.7091166403905291, "learning_rate": 9.729475783779182e-06, "loss": 0.7632, "step": 2063 }, { "epoch": 0.13, "grad_norm": 1.58114096442389, "learning_rate": 9.729139348450701e-06, "loss": 0.8256, "step": 2064 }, { "epoch": 0.13, "grad_norm": 1.3134077869996907, "learning_rate": 9.728802709873747e-06, "loss": 0.6685, "step": 2065 }, { "epoch": 0.13, "grad_norm": 1.5566533399994198, "learning_rate": 9.72846586806279e-06, "loss": 0.8626, "step": 2066 }, { "epoch": 0.13, "grad_norm": 1.6368330770166835, "learning_rate": 9.728128823032305e-06, "loss": 0.7527, "step": 2067 }, { "epoch": 0.13, "grad_norm": 1.6078552590304898, "learning_rate": 9.727791574796779e-06, "loss": 0.6841, "step": 2068 }, { "epoch": 0.13, "grad_norm": 1.7551346893284956, "learning_rate": 9.727454123370705e-06, "loss": 0.7591, "step": 2069 }, { "epoch": 0.13, "grad_norm": 1.8079612279465291, "learning_rate": 9.727116468768586e-06, "loss": 0.786, "step": 2070 }, { "epoch": 0.13, "grad_norm": 1.732136734916048, "learning_rate": 9.726778611004936e-06, "loss": 0.7998, "step": 2071 }, { "epoch": 0.13, "grad_norm": 1.9178398805887729, "learning_rate": 9.726440550094272e-06, "loss": 0.7689, "step": 2072 }, { "epoch": 0.13, "grad_norm": 1.165771256155278, "learning_rate": 9.726102286051126e-06, "loss": 0.5964, "step": 2073 }, { "epoch": 0.13, "grad_norm": 1.5308728062711983, "learning_rate": 9.725763818890035e-06, "loss": 0.782, "step": 2074 }, { "epoch": 0.13, "grad_norm": 1.1200761778682469, "learning_rate": 9.725425148625544e-06, "loss": 0.6132, "step": 2075 }, { "epoch": 0.13, "grad_norm": 1.5475571806114583, "learning_rate": 9.725086275272209e-06, "loss": 0.7907, "step": 2076 }, { "epoch": 0.13, "grad_norm": 2.008895289552839, "learning_rate": 9.724747198844597e-06, "loss": 0.7736, "step": 2077 }, { "epoch": 0.13, "grad_norm": 1.1102045548710102, "learning_rate": 9.724407919357276e-06, "loss": 0.6746, "step": 2078 }, { "epoch": 0.13, "grad_norm": 1.8423878867452212, "learning_rate": 9.724068436824831e-06, "loss": 0.7394, "step": 2079 }, { "epoch": 0.13, "grad_norm": 1.8159316805397805, "learning_rate": 9.723728751261852e-06, "loss": 0.7852, "step": 2080 }, { "epoch": 0.13, "grad_norm": 1.4530520827970164, "learning_rate": 9.723388862682936e-06, "loss": 0.8246, "step": 2081 }, { "epoch": 0.13, "grad_norm": 1.7799194525612443, "learning_rate": 9.723048771102692e-06, "loss": 0.7783, "step": 2082 }, { "epoch": 0.13, "grad_norm": 1.218465144972803, "learning_rate": 9.722708476535736e-06, "loss": 0.633, "step": 2083 }, { "epoch": 0.13, "grad_norm": 1.5593449107367405, "learning_rate": 9.722367978996694e-06, "loss": 0.773, "step": 2084 }, { "epoch": 0.13, "grad_norm": 1.5443605799316944, "learning_rate": 9.722027278500201e-06, "loss": 0.8033, "step": 2085 }, { "epoch": 0.13, "grad_norm": 1.5908862511054747, "learning_rate": 9.721686375060897e-06, "loss": 0.7802, "step": 2086 }, { "epoch": 0.13, "grad_norm": 1.8662032274730924, "learning_rate": 9.721345268693433e-06, "loss": 0.7312, "step": 2087 }, { "epoch": 0.13, "grad_norm": 1.3270551682674874, "learning_rate": 9.72100395941247e-06, "loss": 0.6827, "step": 2088 }, { "epoch": 0.13, "grad_norm": 1.4530117304287011, "learning_rate": 9.720662447232679e-06, "loss": 0.6751, "step": 2089 }, { "epoch": 0.13, "grad_norm": 1.505315985656517, "learning_rate": 9.720320732168733e-06, "loss": 0.727, "step": 2090 }, { "epoch": 0.13, "grad_norm": 1.5616740120948411, "learning_rate": 9.719978814235323e-06, "loss": 0.7572, "step": 2091 }, { "epoch": 0.13, "grad_norm": 1.6499443640318943, "learning_rate": 9.71963669344714e-06, "loss": 0.7721, "step": 2092 }, { "epoch": 0.13, "grad_norm": 1.525153776077733, "learning_rate": 9.71929436981889e-06, "loss": 0.7415, "step": 2093 }, { "epoch": 0.13, "grad_norm": 1.6772136652233214, "learning_rate": 9.718951843365285e-06, "loss": 0.9157, "step": 2094 }, { "epoch": 0.13, "grad_norm": 1.6794437212831967, "learning_rate": 9.718609114101045e-06, "loss": 0.8121, "step": 2095 }, { "epoch": 0.13, "grad_norm": 2.018985220658002, "learning_rate": 9.7182661820409e-06, "loss": 0.8029, "step": 2096 }, { "epoch": 0.13, "grad_norm": 1.5354258183726448, "learning_rate": 9.717923047199591e-06, "loss": 0.8328, "step": 2097 }, { "epoch": 0.13, "grad_norm": 1.6902008952754142, "learning_rate": 9.71757970959186e-06, "loss": 0.7426, "step": 2098 }, { "epoch": 0.13, "grad_norm": 1.5714040876458844, "learning_rate": 9.71723616923247e-06, "loss": 0.7595, "step": 2099 }, { "epoch": 0.13, "grad_norm": 1.5967080516167091, "learning_rate": 9.716892426136179e-06, "loss": 0.7049, "step": 2100 }, { "epoch": 0.13, "grad_norm": 1.4103068572678414, "learning_rate": 9.716548480317763e-06, "loss": 0.7912, "step": 2101 }, { "epoch": 0.13, "grad_norm": 1.7067260442969754, "learning_rate": 9.716204331792005e-06, "loss": 0.8236, "step": 2102 }, { "epoch": 0.13, "grad_norm": 1.6158640696098945, "learning_rate": 9.715859980573694e-06, "loss": 0.7406, "step": 2103 }, { "epoch": 0.13, "grad_norm": 1.5094072638235339, "learning_rate": 9.715515426677633e-06, "loss": 0.7924, "step": 2104 }, { "epoch": 0.13, "grad_norm": 1.9049690078207482, "learning_rate": 9.715170670118625e-06, "loss": 0.7526, "step": 2105 }, { "epoch": 0.13, "grad_norm": 4.067394477973532, "learning_rate": 9.714825710911489e-06, "loss": 0.8034, "step": 2106 }, { "epoch": 0.13, "grad_norm": 1.999260243902144, "learning_rate": 9.714480549071053e-06, "loss": 0.7012, "step": 2107 }, { "epoch": 0.13, "grad_norm": 1.518626782195969, "learning_rate": 9.714135184612149e-06, "loss": 0.8108, "step": 2108 }, { "epoch": 0.13, "grad_norm": 2.1992705501629803, "learning_rate": 9.713789617549621e-06, "loss": 0.8044, "step": 2109 }, { "epoch": 0.14, "grad_norm": 1.7205873728431318, "learning_rate": 9.71344384789832e-06, "loss": 0.6997, "step": 2110 }, { "epoch": 0.14, "grad_norm": 1.6051306808820924, "learning_rate": 9.713097875673105e-06, "loss": 0.774, "step": 2111 }, { "epoch": 0.14, "grad_norm": 1.8210114218570685, "learning_rate": 9.71275170088885e-06, "loss": 0.807, "step": 2112 }, { "epoch": 0.14, "grad_norm": 1.4680101462194075, "learning_rate": 9.712405323560427e-06, "loss": 0.679, "step": 2113 }, { "epoch": 0.14, "grad_norm": 1.6973705983502383, "learning_rate": 9.712058743702727e-06, "loss": 0.7649, "step": 2114 }, { "epoch": 0.14, "grad_norm": 1.6482981649117074, "learning_rate": 9.711711961330644e-06, "loss": 0.723, "step": 2115 }, { "epoch": 0.14, "grad_norm": 1.1039366357756466, "learning_rate": 9.711364976459078e-06, "loss": 0.6473, "step": 2116 }, { "epoch": 0.14, "grad_norm": 1.7213884422890766, "learning_rate": 9.711017789102948e-06, "loss": 0.6519, "step": 2117 }, { "epoch": 0.14, "grad_norm": 1.7494790636197484, "learning_rate": 9.710670399277174e-06, "loss": 0.728, "step": 2118 }, { "epoch": 0.14, "grad_norm": 1.972062098820538, "learning_rate": 9.710322806996682e-06, "loss": 0.7544, "step": 2119 }, { "epoch": 0.14, "grad_norm": 1.8193880090199404, "learning_rate": 9.709975012276416e-06, "loss": 0.8162, "step": 2120 }, { "epoch": 0.14, "grad_norm": 1.5406760467828031, "learning_rate": 9.70962701513132e-06, "loss": 0.769, "step": 2121 }, { "epoch": 0.14, "grad_norm": 1.6107444158486108, "learning_rate": 9.709278815576351e-06, "loss": 0.7532, "step": 2122 }, { "epoch": 0.14, "grad_norm": 1.6120500998710958, "learning_rate": 9.708930413626473e-06, "loss": 0.7147, "step": 2123 }, { "epoch": 0.14, "grad_norm": 1.603952858597939, "learning_rate": 9.708581809296662e-06, "loss": 0.6999, "step": 2124 }, { "epoch": 0.14, "grad_norm": 1.4729788501642844, "learning_rate": 9.708233002601897e-06, "loss": 0.6831, "step": 2125 }, { "epoch": 0.14, "grad_norm": 1.7210723493005597, "learning_rate": 9.707883993557173e-06, "loss": 0.8407, "step": 2126 }, { "epoch": 0.14, "grad_norm": 1.7611893614604843, "learning_rate": 9.707534782177487e-06, "loss": 0.8262, "step": 2127 }, { "epoch": 0.14, "grad_norm": 1.6584313097453907, "learning_rate": 9.707185368477848e-06, "loss": 0.7335, "step": 2128 }, { "epoch": 0.14, "grad_norm": 1.6693190134888576, "learning_rate": 9.706835752473273e-06, "loss": 0.8487, "step": 2129 }, { "epoch": 0.14, "grad_norm": 2.4390215083660136, "learning_rate": 9.706485934178788e-06, "loss": 0.7416, "step": 2130 }, { "epoch": 0.14, "grad_norm": 1.289104803091416, "learning_rate": 9.706135913609426e-06, "loss": 0.6462, "step": 2131 }, { "epoch": 0.14, "grad_norm": 1.599536855128545, "learning_rate": 9.705785690780234e-06, "loss": 0.7995, "step": 2132 }, { "epoch": 0.14, "grad_norm": 1.902617416729234, "learning_rate": 9.70543526570626e-06, "loss": 0.8187, "step": 2133 }, { "epoch": 0.14, "grad_norm": 1.800021487240329, "learning_rate": 9.705084638402565e-06, "loss": 0.7326, "step": 2134 }, { "epoch": 0.14, "grad_norm": 1.5964148288383893, "learning_rate": 9.704733808884219e-06, "loss": 0.7037, "step": 2135 }, { "epoch": 0.14, "grad_norm": 1.5130345287530371, "learning_rate": 9.7043827771663e-06, "loss": 0.7444, "step": 2136 }, { "epoch": 0.14, "grad_norm": 1.8919179458261632, "learning_rate": 9.704031543263893e-06, "loss": 0.7758, "step": 2137 }, { "epoch": 0.14, "grad_norm": 1.5361031705471708, "learning_rate": 9.703680107192098e-06, "loss": 0.7833, "step": 2138 }, { "epoch": 0.14, "grad_norm": 1.8805710246496123, "learning_rate": 9.703328468966016e-06, "loss": 0.7243, "step": 2139 }, { "epoch": 0.14, "grad_norm": 1.758909999988029, "learning_rate": 9.702976628600756e-06, "loss": 0.6884, "step": 2140 }, { "epoch": 0.14, "grad_norm": 1.8033337640015805, "learning_rate": 9.702624586111446e-06, "loss": 0.7843, "step": 2141 }, { "epoch": 0.14, "grad_norm": 1.7589527975999653, "learning_rate": 9.70227234151321e-06, "loss": 0.7814, "step": 2142 }, { "epoch": 0.14, "grad_norm": 1.5254735301526907, "learning_rate": 9.701919894821192e-06, "loss": 0.7559, "step": 2143 }, { "epoch": 0.14, "grad_norm": 1.3222234970557145, "learning_rate": 9.701567246050537e-06, "loss": 0.7443, "step": 2144 }, { "epoch": 0.14, "grad_norm": 1.9055602821491273, "learning_rate": 9.7012143952164e-06, "loss": 0.806, "step": 2145 }, { "epoch": 0.14, "grad_norm": 1.4349319268913414, "learning_rate": 9.700861342333946e-06, "loss": 0.721, "step": 2146 }, { "epoch": 0.14, "grad_norm": 2.462020265174995, "learning_rate": 9.70050808741835e-06, "loss": 0.8516, "step": 2147 }, { "epoch": 0.14, "grad_norm": 1.6966449490505986, "learning_rate": 9.700154630484795e-06, "loss": 0.7308, "step": 2148 }, { "epoch": 0.14, "grad_norm": 1.8734278538332712, "learning_rate": 9.69980097154847e-06, "loss": 0.8276, "step": 2149 }, { "epoch": 0.14, "grad_norm": 1.538283439939932, "learning_rate": 9.699447110624574e-06, "loss": 0.7479, "step": 2150 }, { "epoch": 0.14, "grad_norm": 1.6092395971484605, "learning_rate": 9.699093047728317e-06, "loss": 0.779, "step": 2151 }, { "epoch": 0.14, "grad_norm": 2.1324179496096303, "learning_rate": 9.698738782874914e-06, "loss": 0.7258, "step": 2152 }, { "epoch": 0.14, "grad_norm": 1.8890904939949218, "learning_rate": 9.698384316079592e-06, "loss": 0.7256, "step": 2153 }, { "epoch": 0.14, "grad_norm": 1.464709926932128, "learning_rate": 9.698029647357585e-06, "loss": 0.7953, "step": 2154 }, { "epoch": 0.14, "grad_norm": 1.1988089198439356, "learning_rate": 9.697674776724135e-06, "loss": 0.6217, "step": 2155 }, { "epoch": 0.14, "grad_norm": 1.6374335900226284, "learning_rate": 9.697319704194495e-06, "loss": 0.646, "step": 2156 }, { "epoch": 0.14, "grad_norm": 1.8762006017673354, "learning_rate": 9.696964429783923e-06, "loss": 0.771, "step": 2157 }, { "epoch": 0.14, "grad_norm": 1.600973185772352, "learning_rate": 9.69660895350769e-06, "loss": 0.8357, "step": 2158 }, { "epoch": 0.14, "grad_norm": 1.5129569813776622, "learning_rate": 9.696253275381074e-06, "loss": 0.704, "step": 2159 }, { "epoch": 0.14, "grad_norm": 1.5362707068073882, "learning_rate": 9.69589739541936e-06, "loss": 0.7501, "step": 2160 }, { "epoch": 0.14, "grad_norm": 1.8744922617893238, "learning_rate": 9.695541313637845e-06, "loss": 0.8174, "step": 2161 }, { "epoch": 0.14, "grad_norm": 2.3689792696372107, "learning_rate": 9.695185030051828e-06, "loss": 0.7822, "step": 2162 }, { "epoch": 0.14, "grad_norm": 1.5707486342470458, "learning_rate": 9.694828544676626e-06, "loss": 0.8209, "step": 2163 }, { "epoch": 0.14, "grad_norm": 1.1846719629671398, "learning_rate": 9.69447185752756e-06, "loss": 0.6206, "step": 2164 }, { "epoch": 0.14, "grad_norm": 1.247931383280134, "learning_rate": 9.694114968619955e-06, "loss": 0.7063, "step": 2165 }, { "epoch": 0.14, "grad_norm": 1.6632786344398849, "learning_rate": 9.693757877969155e-06, "loss": 0.7898, "step": 2166 }, { "epoch": 0.14, "grad_norm": 1.6240959093482417, "learning_rate": 9.693400585590502e-06, "loss": 0.8593, "step": 2167 }, { "epoch": 0.14, "grad_norm": 1.5605536065648413, "learning_rate": 9.693043091499355e-06, "loss": 0.7438, "step": 2168 }, { "epoch": 0.14, "grad_norm": 1.796653219225, "learning_rate": 9.692685395711077e-06, "loss": 0.8164, "step": 2169 }, { "epoch": 0.14, "grad_norm": 1.3771227991493393, "learning_rate": 9.692327498241042e-06, "loss": 0.6505, "step": 2170 }, { "epoch": 0.14, "grad_norm": 2.381175623614993, "learning_rate": 9.69196939910463e-06, "loss": 0.8075, "step": 2171 }, { "epoch": 0.14, "grad_norm": 1.6049393902055025, "learning_rate": 9.691611098317234e-06, "loss": 0.753, "step": 2172 }, { "epoch": 0.14, "grad_norm": 1.664994798989877, "learning_rate": 9.69125259589425e-06, "loss": 0.7351, "step": 2173 }, { "epoch": 0.14, "grad_norm": 1.7754207118885148, "learning_rate": 9.690893891851088e-06, "loss": 0.8344, "step": 2174 }, { "epoch": 0.14, "grad_norm": 1.473299962635701, "learning_rate": 9.690534986203164e-06, "loss": 0.8691, "step": 2175 }, { "epoch": 0.14, "grad_norm": 1.1401116135076725, "learning_rate": 9.690175878965902e-06, "loss": 0.6215, "step": 2176 }, { "epoch": 0.14, "grad_norm": 1.5984048582438954, "learning_rate": 9.689816570154735e-06, "loss": 0.7585, "step": 2177 }, { "epoch": 0.14, "grad_norm": 2.022245719532886, "learning_rate": 9.689457059785108e-06, "loss": 0.6638, "step": 2178 }, { "epoch": 0.14, "grad_norm": 1.5434872341961476, "learning_rate": 9.68909734787247e-06, "loss": 0.7122, "step": 2179 }, { "epoch": 0.14, "grad_norm": 1.4217971536324114, "learning_rate": 9.688737434432281e-06, "loss": 0.7615, "step": 2180 }, { "epoch": 0.14, "grad_norm": 1.5601021847836414, "learning_rate": 9.688377319480008e-06, "loss": 0.6614, "step": 2181 }, { "epoch": 0.14, "grad_norm": 1.6246764448533064, "learning_rate": 9.688017003031132e-06, "loss": 0.8515, "step": 2182 }, { "epoch": 0.14, "grad_norm": 1.7308767361344146, "learning_rate": 9.687656485101134e-06, "loss": 0.7579, "step": 2183 }, { "epoch": 0.14, "grad_norm": 1.6319798545609934, "learning_rate": 9.687295765705512e-06, "loss": 0.7518, "step": 2184 }, { "epoch": 0.14, "grad_norm": 1.6878982291403033, "learning_rate": 9.686934844859766e-06, "loss": 0.7056, "step": 2185 }, { "epoch": 0.14, "grad_norm": 1.5455778537507134, "learning_rate": 9.68657372257941e-06, "loss": 0.7346, "step": 2186 }, { "epoch": 0.14, "grad_norm": 1.5245111720880304, "learning_rate": 9.686212398879963e-06, "loss": 0.815, "step": 2187 }, { "epoch": 0.14, "grad_norm": 1.0633139383750583, "learning_rate": 9.685850873776954e-06, "loss": 0.5967, "step": 2188 }, { "epoch": 0.14, "grad_norm": 1.4435873994655273, "learning_rate": 9.68548914728592e-06, "loss": 0.8142, "step": 2189 }, { "epoch": 0.14, "grad_norm": 1.5335934306410364, "learning_rate": 9.68512721942241e-06, "loss": 0.8197, "step": 2190 }, { "epoch": 0.14, "grad_norm": 1.9416709405473145, "learning_rate": 9.684765090201975e-06, "loss": 0.7539, "step": 2191 }, { "epoch": 0.14, "grad_norm": 2.335845228020848, "learning_rate": 9.684402759640181e-06, "loss": 0.7599, "step": 2192 }, { "epoch": 0.14, "grad_norm": 1.5564426177318724, "learning_rate": 9.684040227752601e-06, "loss": 0.7351, "step": 2193 }, { "epoch": 0.14, "grad_norm": 1.5995331188901911, "learning_rate": 9.683677494554813e-06, "loss": 0.6908, "step": 2194 }, { "epoch": 0.14, "grad_norm": 1.6858095849819663, "learning_rate": 9.683314560062409e-06, "loss": 0.7212, "step": 2195 }, { "epoch": 0.14, "grad_norm": 1.5854328804312299, "learning_rate": 9.682951424290985e-06, "loss": 0.836, "step": 2196 }, { "epoch": 0.14, "grad_norm": 2.8771725453782717, "learning_rate": 9.68258808725615e-06, "loss": 0.8072, "step": 2197 }, { "epoch": 0.14, "grad_norm": 1.58639020881444, "learning_rate": 9.682224548973518e-06, "loss": 0.8066, "step": 2198 }, { "epoch": 0.14, "grad_norm": 1.5960534327156313, "learning_rate": 9.681860809458713e-06, "loss": 0.8041, "step": 2199 }, { "epoch": 0.14, "grad_norm": 1.636866015752925, "learning_rate": 9.68149686872737e-06, "loss": 0.6927, "step": 2200 }, { "epoch": 0.14, "grad_norm": 1.5458597109821213, "learning_rate": 9.681132726795128e-06, "loss": 0.742, "step": 2201 }, { "epoch": 0.14, "grad_norm": 1.3764941801918904, "learning_rate": 9.680768383677637e-06, "loss": 0.7182, "step": 2202 }, { "epoch": 0.14, "grad_norm": 2.5523679942528883, "learning_rate": 9.680403839390558e-06, "loss": 0.8009, "step": 2203 }, { "epoch": 0.14, "grad_norm": 1.6480069267012125, "learning_rate": 9.680039093949556e-06, "loss": 0.7982, "step": 2204 }, { "epoch": 0.14, "grad_norm": 1.8671959272380207, "learning_rate": 9.679674147370308e-06, "loss": 0.7365, "step": 2205 }, { "epoch": 0.14, "grad_norm": 1.8777364275350175, "learning_rate": 9.679308999668499e-06, "loss": 0.7716, "step": 2206 }, { "epoch": 0.14, "grad_norm": 1.6826462118809986, "learning_rate": 9.67894365085982e-06, "loss": 0.7664, "step": 2207 }, { "epoch": 0.14, "grad_norm": 1.4372271891582453, "learning_rate": 9.678578100959977e-06, "loss": 0.7688, "step": 2208 }, { "epoch": 0.14, "grad_norm": 1.611905708297929, "learning_rate": 9.678212349984677e-06, "loss": 0.7408, "step": 2209 }, { "epoch": 0.14, "grad_norm": 1.8762472304233984, "learning_rate": 9.677846397949641e-06, "loss": 0.7724, "step": 2210 }, { "epoch": 0.14, "grad_norm": 1.8854679553610565, "learning_rate": 9.677480244870597e-06, "loss": 0.7963, "step": 2211 }, { "epoch": 0.14, "grad_norm": 1.4542756564514556, "learning_rate": 9.67711389076328e-06, "loss": 0.6525, "step": 2212 }, { "epoch": 0.14, "grad_norm": 1.6889732564612014, "learning_rate": 9.676747335643435e-06, "loss": 0.7043, "step": 2213 }, { "epoch": 0.14, "grad_norm": 1.0894494575837874, "learning_rate": 9.676380579526817e-06, "loss": 0.6214, "step": 2214 }, { "epoch": 0.14, "grad_norm": 1.5814295997960457, "learning_rate": 9.676013622429187e-06, "loss": 0.6203, "step": 2215 }, { "epoch": 0.14, "grad_norm": 1.8815926046460645, "learning_rate": 9.67564646436632e-06, "loss": 0.7607, "step": 2216 }, { "epoch": 0.14, "grad_norm": 1.5446911970053399, "learning_rate": 9.675279105353991e-06, "loss": 0.7492, "step": 2217 }, { "epoch": 0.14, "grad_norm": 1.597125024613984, "learning_rate": 9.674911545407992e-06, "loss": 0.7908, "step": 2218 }, { "epoch": 0.14, "grad_norm": 1.6295977645110857, "learning_rate": 9.674543784544118e-06, "loss": 0.7667, "step": 2219 }, { "epoch": 0.14, "grad_norm": 2.021445444167082, "learning_rate": 9.674175822778172e-06, "loss": 0.7563, "step": 2220 }, { "epoch": 0.14, "grad_norm": 1.7758797745459696, "learning_rate": 9.673807660125974e-06, "loss": 0.7927, "step": 2221 }, { "epoch": 0.14, "grad_norm": 1.4353265478708768, "learning_rate": 9.673439296603341e-06, "loss": 0.7519, "step": 2222 }, { "epoch": 0.14, "grad_norm": 1.603886594633305, "learning_rate": 9.673070732226109e-06, "loss": 0.73, "step": 2223 }, { "epoch": 0.14, "grad_norm": 1.7022977883479198, "learning_rate": 9.672701967010117e-06, "loss": 0.7007, "step": 2224 }, { "epoch": 0.14, "grad_norm": 1.556215676396692, "learning_rate": 9.67233300097121e-06, "loss": 0.7248, "step": 2225 }, { "epoch": 0.14, "grad_norm": 1.7437855940818865, "learning_rate": 9.671963834125251e-06, "loss": 0.7567, "step": 2226 }, { "epoch": 0.14, "grad_norm": 1.9753586693341378, "learning_rate": 9.671594466488104e-06, "loss": 0.797, "step": 2227 }, { "epoch": 0.14, "grad_norm": 1.7043165820079167, "learning_rate": 9.671224898075643e-06, "loss": 0.6889, "step": 2228 }, { "epoch": 0.14, "grad_norm": 1.648305819159604, "learning_rate": 9.670855128903752e-06, "loss": 0.6851, "step": 2229 }, { "epoch": 0.14, "grad_norm": 1.7250720851895907, "learning_rate": 9.67048515898832e-06, "loss": 0.6999, "step": 2230 }, { "epoch": 0.14, "grad_norm": 1.6397139956991895, "learning_rate": 9.670114988345252e-06, "loss": 0.7689, "step": 2231 }, { "epoch": 0.14, "grad_norm": 1.7562199915176278, "learning_rate": 9.669744616990454e-06, "loss": 0.7107, "step": 2232 }, { "epoch": 0.14, "grad_norm": 1.697570006826142, "learning_rate": 9.669374044939846e-06, "loss": 0.7175, "step": 2233 }, { "epoch": 0.14, "grad_norm": 1.593390413533863, "learning_rate": 9.669003272209352e-06, "loss": 0.7692, "step": 2234 }, { "epoch": 0.14, "grad_norm": 1.4884153451413296, "learning_rate": 9.668632298814908e-06, "loss": 0.7624, "step": 2235 }, { "epoch": 0.14, "grad_norm": 1.4947130620745057, "learning_rate": 9.66826112477246e-06, "loss": 0.7476, "step": 2236 }, { "epoch": 0.14, "grad_norm": 1.734137685444167, "learning_rate": 9.667889750097956e-06, "loss": 0.6897, "step": 2237 }, { "epoch": 0.14, "grad_norm": 1.5714600239075909, "learning_rate": 9.66751817480736e-06, "loss": 0.8004, "step": 2238 }, { "epoch": 0.14, "grad_norm": 1.628831346332239, "learning_rate": 9.66714639891664e-06, "loss": 0.7517, "step": 2239 }, { "epoch": 0.14, "grad_norm": 2.8943646394591904, "learning_rate": 9.666774422441776e-06, "loss": 0.7288, "step": 2240 }, { "epoch": 0.14, "grad_norm": 1.8582030173046473, "learning_rate": 9.666402245398753e-06, "loss": 0.7779, "step": 2241 }, { "epoch": 0.14, "grad_norm": 1.651439647887928, "learning_rate": 9.666029867803569e-06, "loss": 0.7314, "step": 2242 }, { "epoch": 0.14, "grad_norm": 1.7687119762217967, "learning_rate": 9.665657289672222e-06, "loss": 0.7958, "step": 2243 }, { "epoch": 0.14, "grad_norm": 1.5869150316377982, "learning_rate": 9.665284511020732e-06, "loss": 0.664, "step": 2244 }, { "epoch": 0.14, "grad_norm": 2.0617508624730325, "learning_rate": 9.664911531865115e-06, "loss": 0.6885, "step": 2245 }, { "epoch": 0.14, "grad_norm": 1.547019321195309, "learning_rate": 9.664538352221401e-06, "loss": 0.7393, "step": 2246 }, { "epoch": 0.14, "grad_norm": 1.7079437091549778, "learning_rate": 9.664164972105634e-06, "loss": 0.7518, "step": 2247 }, { "epoch": 0.14, "grad_norm": 1.9910177160614118, "learning_rate": 9.663791391533856e-06, "loss": 0.7019, "step": 2248 }, { "epoch": 0.14, "grad_norm": 1.4972447354106677, "learning_rate": 9.663417610522124e-06, "loss": 0.743, "step": 2249 }, { "epoch": 0.14, "grad_norm": 1.553121905216919, "learning_rate": 9.663043629086501e-06, "loss": 0.7441, "step": 2250 }, { "epoch": 0.14, "grad_norm": 1.2628003653672721, "learning_rate": 9.66266944724306e-06, "loss": 0.7369, "step": 2251 }, { "epoch": 0.14, "grad_norm": 1.9838599535877959, "learning_rate": 9.662295065007887e-06, "loss": 0.7058, "step": 2252 }, { "epoch": 0.14, "grad_norm": 1.6855297625526169, "learning_rate": 9.661920482397069e-06, "loss": 0.7442, "step": 2253 }, { "epoch": 0.14, "grad_norm": 1.5805022689112016, "learning_rate": 9.661545699426703e-06, "loss": 0.7075, "step": 2254 }, { "epoch": 0.14, "grad_norm": 1.952643092205762, "learning_rate": 9.661170716112897e-06, "loss": 0.7592, "step": 2255 }, { "epoch": 0.14, "grad_norm": 1.4035718186238426, "learning_rate": 9.66079553247177e-06, "loss": 0.7131, "step": 2256 }, { "epoch": 0.14, "grad_norm": 1.9039574574384954, "learning_rate": 9.660420148519444e-06, "loss": 0.6947, "step": 2257 }, { "epoch": 0.14, "grad_norm": 1.4261183195315792, "learning_rate": 9.660044564272054e-06, "loss": 0.662, "step": 2258 }, { "epoch": 0.14, "grad_norm": 1.5398238649971094, "learning_rate": 9.65966877974574e-06, "loss": 0.7488, "step": 2259 }, { "epoch": 0.14, "grad_norm": 1.5402135689405232, "learning_rate": 9.659292794956652e-06, "loss": 0.8411, "step": 2260 }, { "epoch": 0.14, "grad_norm": 1.4604571054863553, "learning_rate": 9.658916609920951e-06, "loss": 0.7369, "step": 2261 }, { "epoch": 0.14, "grad_norm": 1.7069493037231782, "learning_rate": 9.658540224654805e-06, "loss": 0.8566, "step": 2262 }, { "epoch": 0.14, "grad_norm": 1.3023485124249514, "learning_rate": 9.65816363917439e-06, "loss": 0.6941, "step": 2263 }, { "epoch": 0.14, "grad_norm": 1.6794047805526484, "learning_rate": 9.657786853495888e-06, "loss": 0.7329, "step": 2264 }, { "epoch": 0.14, "grad_norm": 1.61214251905636, "learning_rate": 9.657409867635494e-06, "loss": 0.764, "step": 2265 }, { "epoch": 0.15, "grad_norm": 1.6053130885466276, "learning_rate": 9.65703268160941e-06, "loss": 0.7748, "step": 2266 }, { "epoch": 0.15, "grad_norm": 1.7356640764703914, "learning_rate": 9.656655295433849e-06, "loss": 0.8055, "step": 2267 }, { "epoch": 0.15, "grad_norm": 1.1542878683073567, "learning_rate": 9.656277709125028e-06, "loss": 0.7026, "step": 2268 }, { "epoch": 0.15, "grad_norm": 1.4566004604350702, "learning_rate": 9.655899922699174e-06, "loss": 0.679, "step": 2269 }, { "epoch": 0.15, "grad_norm": 1.8145151692173744, "learning_rate": 9.655521936172525e-06, "loss": 0.7124, "step": 2270 }, { "epoch": 0.15, "grad_norm": 1.8516676599127795, "learning_rate": 9.655143749561326e-06, "loss": 0.7642, "step": 2271 }, { "epoch": 0.15, "grad_norm": 1.7504024925997272, "learning_rate": 9.65476536288183e-06, "loss": 0.7545, "step": 2272 }, { "epoch": 0.15, "grad_norm": 1.5342478827122223, "learning_rate": 9.654386776150301e-06, "loss": 0.6788, "step": 2273 }, { "epoch": 0.15, "grad_norm": 1.1320832065248434, "learning_rate": 9.654007989383009e-06, "loss": 0.7281, "step": 2274 }, { "epoch": 0.15, "grad_norm": 1.5446990161033318, "learning_rate": 9.653629002596232e-06, "loss": 0.7133, "step": 2275 }, { "epoch": 0.15, "grad_norm": 1.8454889171725306, "learning_rate": 9.653249815806258e-06, "loss": 0.704, "step": 2276 }, { "epoch": 0.15, "grad_norm": 2.4254333634392395, "learning_rate": 9.652870429029386e-06, "loss": 0.7695, "step": 2277 }, { "epoch": 0.15, "grad_norm": 1.5441454851501848, "learning_rate": 9.652490842281921e-06, "loss": 0.7048, "step": 2278 }, { "epoch": 0.15, "grad_norm": 1.5789924037244079, "learning_rate": 9.652111055580175e-06, "loss": 0.7594, "step": 2279 }, { "epoch": 0.15, "grad_norm": 1.6740790409005457, "learning_rate": 9.651731068940472e-06, "loss": 0.7253, "step": 2280 }, { "epoch": 0.15, "grad_norm": 1.667936647708894, "learning_rate": 9.65135088237914e-06, "loss": 0.7766, "step": 2281 }, { "epoch": 0.15, "grad_norm": 1.8590618007234423, "learning_rate": 9.650970495912521e-06, "loss": 0.7521, "step": 2282 }, { "epoch": 0.15, "grad_norm": 1.567314938540432, "learning_rate": 9.650589909556964e-06, "loss": 0.6791, "step": 2283 }, { "epoch": 0.15, "grad_norm": 1.5281131018668426, "learning_rate": 9.650209123328826e-06, "loss": 0.7616, "step": 2284 }, { "epoch": 0.15, "grad_norm": 1.8182469473874048, "learning_rate": 9.649828137244471e-06, "loss": 0.7175, "step": 2285 }, { "epoch": 0.15, "grad_norm": 1.5419448488527467, "learning_rate": 9.649446951320273e-06, "loss": 0.7751, "step": 2286 }, { "epoch": 0.15, "grad_norm": 1.5606837484107525, "learning_rate": 9.649065565572615e-06, "loss": 0.8035, "step": 2287 }, { "epoch": 0.15, "grad_norm": 1.2064396379786746, "learning_rate": 9.648683980017887e-06, "loss": 0.5846, "step": 2288 }, { "epoch": 0.15, "grad_norm": 1.6444006304515792, "learning_rate": 9.64830219467249e-06, "loss": 0.8106, "step": 2289 }, { "epoch": 0.15, "grad_norm": 1.8509454818844153, "learning_rate": 9.647920209552832e-06, "loss": 0.7971, "step": 2290 }, { "epoch": 0.15, "grad_norm": 1.5409832766864857, "learning_rate": 9.647538024675331e-06, "loss": 0.743, "step": 2291 }, { "epoch": 0.15, "grad_norm": 1.6424869983170485, "learning_rate": 9.647155640056411e-06, "loss": 0.7846, "step": 2292 }, { "epoch": 0.15, "grad_norm": 1.947636846650218, "learning_rate": 9.646773055712508e-06, "loss": 0.7454, "step": 2293 }, { "epoch": 0.15, "grad_norm": 1.558308212578467, "learning_rate": 9.646390271660062e-06, "loss": 0.7196, "step": 2294 }, { "epoch": 0.15, "grad_norm": 1.5436000641035068, "learning_rate": 9.646007287915524e-06, "loss": 0.8183, "step": 2295 }, { "epoch": 0.15, "grad_norm": 3.3916608332502762, "learning_rate": 9.645624104495358e-06, "loss": 0.741, "step": 2296 }, { "epoch": 0.15, "grad_norm": 1.2634923671444493, "learning_rate": 9.64524072141603e-06, "loss": 0.7746, "step": 2297 }, { "epoch": 0.15, "grad_norm": 1.6517026587783272, "learning_rate": 9.644857138694016e-06, "loss": 0.688, "step": 2298 }, { "epoch": 0.15, "grad_norm": 1.8354175588923411, "learning_rate": 9.6444733563458e-06, "loss": 0.8255, "step": 2299 }, { "epoch": 0.15, "grad_norm": 1.6710506356835477, "learning_rate": 9.644089374387881e-06, "loss": 0.7515, "step": 2300 }, { "epoch": 0.15, "grad_norm": 1.6081735641270551, "learning_rate": 9.643705192836758e-06, "loss": 0.726, "step": 2301 }, { "epoch": 0.15, "grad_norm": 1.4638396818618704, "learning_rate": 9.643320811708944e-06, "loss": 0.7429, "step": 2302 }, { "epoch": 0.15, "grad_norm": 1.4500091457827406, "learning_rate": 9.64293623102096e-06, "loss": 0.769, "step": 2303 }, { "epoch": 0.15, "grad_norm": 1.598802682509271, "learning_rate": 9.642551450789331e-06, "loss": 0.7933, "step": 2304 }, { "epoch": 0.15, "grad_norm": 1.9225014526426432, "learning_rate": 9.642166471030596e-06, "loss": 0.7507, "step": 2305 }, { "epoch": 0.15, "grad_norm": 1.7740618405217439, "learning_rate": 9.641781291761301e-06, "loss": 0.8453, "step": 2306 }, { "epoch": 0.15, "grad_norm": 1.8206305896930184, "learning_rate": 9.641395912998e-06, "loss": 0.7118, "step": 2307 }, { "epoch": 0.15, "grad_norm": 1.5577588592515088, "learning_rate": 9.641010334757255e-06, "loss": 0.7259, "step": 2308 }, { "epoch": 0.15, "grad_norm": 1.873754664745245, "learning_rate": 9.64062455705564e-06, "loss": 0.7953, "step": 2309 }, { "epoch": 0.15, "grad_norm": 1.8403217211204872, "learning_rate": 9.64023857990973e-06, "loss": 0.7548, "step": 2310 }, { "epoch": 0.15, "grad_norm": 1.746447438831052, "learning_rate": 9.639852403336118e-06, "loss": 0.6884, "step": 2311 }, { "epoch": 0.15, "grad_norm": 1.618955525741155, "learning_rate": 9.6394660273514e-06, "loss": 0.83, "step": 2312 }, { "epoch": 0.15, "grad_norm": 1.764743704896764, "learning_rate": 9.63907945197218e-06, "loss": 0.7098, "step": 2313 }, { "epoch": 0.15, "grad_norm": 1.550629532484094, "learning_rate": 9.638692677215074e-06, "loss": 0.8068, "step": 2314 }, { "epoch": 0.15, "grad_norm": 1.4705444799394547, "learning_rate": 9.638305703096702e-06, "loss": 0.8187, "step": 2315 }, { "epoch": 0.15, "grad_norm": 1.7032731997862025, "learning_rate": 9.637918529633699e-06, "loss": 0.705, "step": 2316 }, { "epoch": 0.15, "grad_norm": 1.6431651574322959, "learning_rate": 9.637531156842702e-06, "loss": 0.7238, "step": 2317 }, { "epoch": 0.15, "grad_norm": 1.7517435158797618, "learning_rate": 9.637143584740363e-06, "loss": 0.8088, "step": 2318 }, { "epoch": 0.15, "grad_norm": 1.6942887234551502, "learning_rate": 9.636755813343334e-06, "loss": 0.798, "step": 2319 }, { "epoch": 0.15, "grad_norm": 2.1468813526362345, "learning_rate": 9.636367842668284e-06, "loss": 0.7837, "step": 2320 }, { "epoch": 0.15, "grad_norm": 1.738046985197955, "learning_rate": 9.635979672731888e-06, "loss": 0.8128, "step": 2321 }, { "epoch": 0.15, "grad_norm": 1.337444006911769, "learning_rate": 9.635591303550826e-06, "loss": 0.6686, "step": 2322 }, { "epoch": 0.15, "grad_norm": 1.2277794078354067, "learning_rate": 9.635202735141792e-06, "loss": 0.738, "step": 2323 }, { "epoch": 0.15, "grad_norm": 1.4926850411817876, "learning_rate": 9.634813967521482e-06, "loss": 0.7615, "step": 2324 }, { "epoch": 0.15, "grad_norm": 1.4918843268331508, "learning_rate": 9.634425000706607e-06, "loss": 0.7399, "step": 2325 }, { "epoch": 0.15, "grad_norm": 1.646346541417021, "learning_rate": 9.634035834713885e-06, "loss": 0.6976, "step": 2326 }, { "epoch": 0.15, "grad_norm": 1.5355999385081027, "learning_rate": 9.633646469560039e-06, "loss": 0.6992, "step": 2327 }, { "epoch": 0.15, "grad_norm": 1.5219666815727162, "learning_rate": 9.633256905261805e-06, "loss": 0.7141, "step": 2328 }, { "epoch": 0.15, "grad_norm": 3.17234484094544, "learning_rate": 9.632867141835926e-06, "loss": 0.792, "step": 2329 }, { "epoch": 0.15, "grad_norm": 1.5066569639823615, "learning_rate": 9.632477179299152e-06, "loss": 0.7124, "step": 2330 }, { "epoch": 0.15, "grad_norm": 1.0712811631990973, "learning_rate": 9.632087017668242e-06, "loss": 0.6818, "step": 2331 }, { "epoch": 0.15, "grad_norm": 1.8078509124140718, "learning_rate": 9.631696656959966e-06, "loss": 0.7557, "step": 2332 }, { "epoch": 0.15, "grad_norm": 2.1787157218504087, "learning_rate": 9.6313060971911e-06, "loss": 0.7837, "step": 2333 }, { "epoch": 0.15, "grad_norm": 1.525541761842095, "learning_rate": 9.63091533837843e-06, "loss": 0.7983, "step": 2334 }, { "epoch": 0.15, "grad_norm": 1.606265255546906, "learning_rate": 9.630524380538748e-06, "loss": 0.7062, "step": 2335 }, { "epoch": 0.15, "grad_norm": 1.3825483507061105, "learning_rate": 9.63013322368886e-06, "loss": 0.6931, "step": 2336 }, { "epoch": 0.15, "grad_norm": 1.558840868697464, "learning_rate": 9.629741867845574e-06, "loss": 0.7116, "step": 2337 }, { "epoch": 0.15, "grad_norm": 1.75185457585759, "learning_rate": 9.629350313025711e-06, "loss": 0.8707, "step": 2338 }, { "epoch": 0.15, "grad_norm": 1.877062552957906, "learning_rate": 9.628958559246101e-06, "loss": 0.7761, "step": 2339 }, { "epoch": 0.15, "grad_norm": 2.5444881088670166, "learning_rate": 9.628566606523578e-06, "loss": 0.7197, "step": 2340 }, { "epoch": 0.15, "grad_norm": 1.3858087318352832, "learning_rate": 9.628174454874988e-06, "loss": 0.7568, "step": 2341 }, { "epoch": 0.15, "grad_norm": 2.0330670842928953, "learning_rate": 9.627782104317185e-06, "loss": 0.8345, "step": 2342 }, { "epoch": 0.15, "grad_norm": 1.5451579601322347, "learning_rate": 9.627389554867032e-06, "loss": 0.7995, "step": 2343 }, { "epoch": 0.15, "grad_norm": 1.5094332184107917, "learning_rate": 9.626996806541398e-06, "loss": 0.7466, "step": 2344 }, { "epoch": 0.15, "grad_norm": 1.7449197134010543, "learning_rate": 9.626603859357165e-06, "loss": 0.7435, "step": 2345 }, { "epoch": 0.15, "grad_norm": 1.225126319367994, "learning_rate": 9.62621071333122e-06, "loss": 0.6888, "step": 2346 }, { "epoch": 0.15, "grad_norm": 1.5908770010602051, "learning_rate": 9.625817368480459e-06, "loss": 0.6588, "step": 2347 }, { "epoch": 0.15, "grad_norm": 1.5643155461581482, "learning_rate": 9.625423824821789e-06, "loss": 0.6939, "step": 2348 }, { "epoch": 0.15, "grad_norm": 1.6365293444649878, "learning_rate": 9.625030082372122e-06, "loss": 0.7289, "step": 2349 }, { "epoch": 0.15, "grad_norm": 1.4067743294276869, "learning_rate": 9.624636141148377e-06, "loss": 0.7398, "step": 2350 }, { "epoch": 0.15, "grad_norm": 1.122660327265523, "learning_rate": 9.624242001167493e-06, "loss": 0.7001, "step": 2351 }, { "epoch": 0.15, "grad_norm": 2.18547572615925, "learning_rate": 9.623847662446404e-06, "loss": 0.708, "step": 2352 }, { "epoch": 0.15, "grad_norm": 1.6557469039090802, "learning_rate": 9.623453125002056e-06, "loss": 0.7732, "step": 2353 }, { "epoch": 0.15, "grad_norm": 1.079732419325808, "learning_rate": 9.62305838885141e-06, "loss": 0.6425, "step": 2354 }, { "epoch": 0.15, "grad_norm": 1.6078973710442157, "learning_rate": 9.622663454011429e-06, "loss": 0.8432, "step": 2355 }, { "epoch": 0.15, "grad_norm": 1.813638205051305, "learning_rate": 9.622268320499083e-06, "loss": 0.7702, "step": 2356 }, { "epoch": 0.15, "grad_norm": 1.721600117330945, "learning_rate": 9.621872988331362e-06, "loss": 0.7755, "step": 2357 }, { "epoch": 0.15, "grad_norm": 1.7481967025907301, "learning_rate": 9.62147745752525e-06, "loss": 0.7206, "step": 2358 }, { "epoch": 0.15, "grad_norm": 1.6375218628874695, "learning_rate": 9.621081728097747e-06, "loss": 0.7342, "step": 2359 }, { "epoch": 0.15, "grad_norm": 3.3387840980984085, "learning_rate": 9.620685800065861e-06, "loss": 0.6644, "step": 2360 }, { "epoch": 0.15, "grad_norm": 1.4551948008999476, "learning_rate": 9.620289673446611e-06, "loss": 0.7197, "step": 2361 }, { "epoch": 0.15, "grad_norm": 1.6290190102594868, "learning_rate": 9.619893348257019e-06, "loss": 0.7454, "step": 2362 }, { "epoch": 0.15, "grad_norm": 2.4037751752622034, "learning_rate": 9.619496824514118e-06, "loss": 0.7189, "step": 2363 }, { "epoch": 0.15, "grad_norm": 1.6288927323801885, "learning_rate": 9.61910010223495e-06, "loss": 0.7183, "step": 2364 }, { "epoch": 0.15, "grad_norm": 2.0681036058488753, "learning_rate": 9.618703181436566e-06, "loss": 0.7746, "step": 2365 }, { "epoch": 0.15, "grad_norm": 1.7500670069332218, "learning_rate": 9.618306062136025e-06, "loss": 0.7666, "step": 2366 }, { "epoch": 0.15, "grad_norm": 1.2014038508795526, "learning_rate": 9.617908744350392e-06, "loss": 0.6607, "step": 2367 }, { "epoch": 0.15, "grad_norm": 1.4196910107441167, "learning_rate": 9.617511228096746e-06, "loss": 0.7192, "step": 2368 }, { "epoch": 0.15, "grad_norm": 1.7030964008857434, "learning_rate": 9.61711351339217e-06, "loss": 0.6545, "step": 2369 }, { "epoch": 0.15, "grad_norm": 1.7098634722544792, "learning_rate": 9.616715600253759e-06, "loss": 0.7194, "step": 2370 }, { "epoch": 0.15, "grad_norm": 1.624393006925151, "learning_rate": 9.61631748869861e-06, "loss": 0.6868, "step": 2371 }, { "epoch": 0.15, "grad_norm": 2.080492869414, "learning_rate": 9.615919178743836e-06, "loss": 0.7935, "step": 2372 }, { "epoch": 0.15, "grad_norm": 2.0013663369394816, "learning_rate": 9.615520670406555e-06, "loss": 0.7484, "step": 2373 }, { "epoch": 0.15, "grad_norm": 1.5959156226054934, "learning_rate": 9.615121963703895e-06, "loss": 0.7414, "step": 2374 }, { "epoch": 0.15, "grad_norm": 1.6794912420619128, "learning_rate": 9.61472305865299e-06, "loss": 0.7039, "step": 2375 }, { "epoch": 0.15, "grad_norm": 1.5925699647905784, "learning_rate": 9.614323955270985e-06, "loss": 0.7714, "step": 2376 }, { "epoch": 0.15, "grad_norm": 1.6570855680943832, "learning_rate": 9.613924653575034e-06, "loss": 0.763, "step": 2377 }, { "epoch": 0.15, "grad_norm": 1.6658840965349595, "learning_rate": 9.613525153582295e-06, "loss": 0.7092, "step": 2378 }, { "epoch": 0.15, "grad_norm": 1.9131426616641698, "learning_rate": 9.61312545530994e-06, "loss": 0.692, "step": 2379 }, { "epoch": 0.15, "grad_norm": 1.62367336972185, "learning_rate": 9.612725558775144e-06, "loss": 0.7457, "step": 2380 }, { "epoch": 0.15, "grad_norm": 1.63940997924214, "learning_rate": 9.612325463995099e-06, "loss": 0.7394, "step": 2381 }, { "epoch": 0.15, "grad_norm": 1.3842279446569195, "learning_rate": 9.611925170986996e-06, "loss": 0.7775, "step": 2382 }, { "epoch": 0.15, "grad_norm": 1.6776169617702863, "learning_rate": 9.61152467976804e-06, "loss": 0.7176, "step": 2383 }, { "epoch": 0.15, "grad_norm": 1.509625240032546, "learning_rate": 9.611123990355445e-06, "loss": 0.7292, "step": 2384 }, { "epoch": 0.15, "grad_norm": 1.5610040240466845, "learning_rate": 9.610723102766429e-06, "loss": 0.7957, "step": 2385 }, { "epoch": 0.15, "grad_norm": 1.9262342419888283, "learning_rate": 9.610322017018224e-06, "loss": 0.7915, "step": 2386 }, { "epoch": 0.15, "grad_norm": 1.5355258415478166, "learning_rate": 9.609920733128064e-06, "loss": 0.7891, "step": 2387 }, { "epoch": 0.15, "grad_norm": 1.6149067424637134, "learning_rate": 9.609519251113199e-06, "loss": 0.7013, "step": 2388 }, { "epoch": 0.15, "grad_norm": 1.9954864048860483, "learning_rate": 9.609117570990882e-06, "loss": 0.7706, "step": 2389 }, { "epoch": 0.15, "grad_norm": 1.8038727474605365, "learning_rate": 9.608715692778377e-06, "loss": 0.6891, "step": 2390 }, { "epoch": 0.15, "grad_norm": 1.7165482789801052, "learning_rate": 9.608313616492954e-06, "loss": 0.8927, "step": 2391 }, { "epoch": 0.15, "grad_norm": 1.612070733030958, "learning_rate": 9.607911342151898e-06, "loss": 0.7253, "step": 2392 }, { "epoch": 0.15, "grad_norm": 1.5946366634220848, "learning_rate": 9.607508869772495e-06, "loss": 0.7691, "step": 2393 }, { "epoch": 0.15, "grad_norm": 1.6210577577253673, "learning_rate": 9.60710619937204e-06, "loss": 0.6962, "step": 2394 }, { "epoch": 0.15, "grad_norm": 2.066967650071907, "learning_rate": 9.606703330967843e-06, "loss": 0.7968, "step": 2395 }, { "epoch": 0.15, "grad_norm": 1.9757933780581691, "learning_rate": 9.606300264577217e-06, "loss": 0.6671, "step": 2396 }, { "epoch": 0.15, "grad_norm": 1.4748634983621247, "learning_rate": 9.605897000217485e-06, "loss": 0.7069, "step": 2397 }, { "epoch": 0.15, "grad_norm": 1.820193209724446, "learning_rate": 9.605493537905978e-06, "loss": 0.6839, "step": 2398 }, { "epoch": 0.15, "grad_norm": 1.4955979007643014, "learning_rate": 9.605089877660036e-06, "loss": 0.7009, "step": 2399 }, { "epoch": 0.15, "grad_norm": 1.344131342432567, "learning_rate": 9.604686019497008e-06, "loss": 0.721, "step": 2400 }, { "epoch": 0.15, "grad_norm": 1.590752967452006, "learning_rate": 9.60428196343425e-06, "loss": 0.7321, "step": 2401 }, { "epoch": 0.15, "grad_norm": 1.5937260134989624, "learning_rate": 9.603877709489128e-06, "loss": 0.7084, "step": 2402 }, { "epoch": 0.15, "grad_norm": 1.4475498021556212, "learning_rate": 9.603473257679018e-06, "loss": 0.677, "step": 2403 }, { "epoch": 0.15, "grad_norm": 1.3974938380028836, "learning_rate": 9.6030686080213e-06, "loss": 0.7262, "step": 2404 }, { "epoch": 0.15, "grad_norm": 2.07554513296464, "learning_rate": 9.602663760533364e-06, "loss": 0.7941, "step": 2405 }, { "epoch": 0.15, "grad_norm": 1.4667789544631573, "learning_rate": 9.602258715232611e-06, "loss": 0.7332, "step": 2406 }, { "epoch": 0.15, "grad_norm": 1.6941938000844858, "learning_rate": 9.601853472136451e-06, "loss": 0.83, "step": 2407 }, { "epoch": 0.15, "grad_norm": 1.7104087798462322, "learning_rate": 9.601448031262298e-06, "loss": 0.7028, "step": 2408 }, { "epoch": 0.15, "grad_norm": 1.7964385432840178, "learning_rate": 9.601042392627577e-06, "loss": 0.8145, "step": 2409 }, { "epoch": 0.15, "grad_norm": 1.175967967227646, "learning_rate": 9.600636556249722e-06, "loss": 0.5797, "step": 2410 }, { "epoch": 0.15, "grad_norm": 1.455815655660089, "learning_rate": 9.600230522146176e-06, "loss": 0.7872, "step": 2411 }, { "epoch": 0.15, "grad_norm": 1.6721602381246456, "learning_rate": 9.599824290334388e-06, "loss": 0.7862, "step": 2412 }, { "epoch": 0.15, "grad_norm": 1.7706447455398422, "learning_rate": 9.599417860831818e-06, "loss": 0.8089, "step": 2413 }, { "epoch": 0.15, "grad_norm": 1.3996806568408344, "learning_rate": 9.599011233655933e-06, "loss": 0.744, "step": 2414 }, { "epoch": 0.15, "grad_norm": 1.5374402777737595, "learning_rate": 9.598604408824209e-06, "loss": 0.7372, "step": 2415 }, { "epoch": 0.15, "grad_norm": 1.481774285326487, "learning_rate": 9.598197386354129e-06, "loss": 0.6973, "step": 2416 }, { "epoch": 0.15, "grad_norm": 1.6419254090314475, "learning_rate": 9.59779016626319e-06, "loss": 0.8366, "step": 2417 }, { "epoch": 0.15, "grad_norm": 1.8012960190748184, "learning_rate": 9.597382748568889e-06, "loss": 0.8269, "step": 2418 }, { "epoch": 0.15, "grad_norm": 1.614488368048763, "learning_rate": 9.596975133288738e-06, "loss": 0.6943, "step": 2419 }, { "epoch": 0.15, "grad_norm": 1.5743358046772584, "learning_rate": 9.596567320440255e-06, "loss": 0.7237, "step": 2420 }, { "epoch": 0.15, "grad_norm": 1.0875070077568163, "learning_rate": 9.596159310040968e-06, "loss": 0.6603, "step": 2421 }, { "epoch": 0.16, "grad_norm": 1.3468770884602774, "learning_rate": 9.595751102108412e-06, "loss": 0.7381, "step": 2422 }, { "epoch": 0.16, "grad_norm": 1.6656911964285077, "learning_rate": 9.595342696660133e-06, "loss": 0.7419, "step": 2423 }, { "epoch": 0.16, "grad_norm": 1.6147062538161547, "learning_rate": 9.594934093713677e-06, "loss": 0.7087, "step": 2424 }, { "epoch": 0.16, "grad_norm": 1.8555705831329472, "learning_rate": 9.594525293286611e-06, "loss": 0.8082, "step": 2425 }, { "epoch": 0.16, "grad_norm": 1.2180337353362902, "learning_rate": 9.594116295396502e-06, "loss": 0.6603, "step": 2426 }, { "epoch": 0.16, "grad_norm": 1.732836369823613, "learning_rate": 9.593707100060927e-06, "loss": 0.7749, "step": 2427 }, { "epoch": 0.16, "grad_norm": 1.6608441771901707, "learning_rate": 9.593297707297475e-06, "loss": 0.7303, "step": 2428 }, { "epoch": 0.16, "grad_norm": 1.5120072764563464, "learning_rate": 9.59288811712374e-06, "loss": 0.7313, "step": 2429 }, { "epoch": 0.16, "grad_norm": 1.466708226659425, "learning_rate": 9.592478329557323e-06, "loss": 0.7842, "step": 2430 }, { "epoch": 0.16, "grad_norm": 1.1091844056684186, "learning_rate": 9.592068344615837e-06, "loss": 0.6158, "step": 2431 }, { "epoch": 0.16, "grad_norm": 1.8129129587488113, "learning_rate": 9.591658162316905e-06, "loss": 0.7944, "step": 2432 }, { "epoch": 0.16, "grad_norm": 1.5582162607642491, "learning_rate": 9.591247782678153e-06, "loss": 0.7552, "step": 2433 }, { "epoch": 0.16, "grad_norm": 1.5476027148183973, "learning_rate": 9.590837205717219e-06, "loss": 0.7837, "step": 2434 }, { "epoch": 0.16, "grad_norm": 1.544749091775404, "learning_rate": 9.590426431451748e-06, "loss": 0.7541, "step": 2435 }, { "epoch": 0.16, "grad_norm": 1.5557869194156169, "learning_rate": 9.590015459899394e-06, "loss": 0.7826, "step": 2436 }, { "epoch": 0.16, "grad_norm": 1.673127557362808, "learning_rate": 9.58960429107782e-06, "loss": 0.7818, "step": 2437 }, { "epoch": 0.16, "grad_norm": 1.8502416974544635, "learning_rate": 9.5891929250047e-06, "loss": 0.7212, "step": 2438 }, { "epoch": 0.16, "grad_norm": 1.2894328141647595, "learning_rate": 9.58878136169771e-06, "loss": 0.6275, "step": 2439 }, { "epoch": 0.16, "grad_norm": 1.6651864583394946, "learning_rate": 9.58836960117454e-06, "loss": 0.7102, "step": 2440 }, { "epoch": 0.16, "grad_norm": 2.132728541690629, "learning_rate": 9.587957643452886e-06, "loss": 0.7594, "step": 2441 }, { "epoch": 0.16, "grad_norm": 1.8248524148342185, "learning_rate": 9.587545488550453e-06, "loss": 0.8064, "step": 2442 }, { "epoch": 0.16, "grad_norm": 1.5719146229184593, "learning_rate": 9.587133136484953e-06, "loss": 0.8591, "step": 2443 }, { "epoch": 0.16, "grad_norm": 1.5962539976237131, "learning_rate": 9.586720587274113e-06, "loss": 0.7396, "step": 2444 }, { "epoch": 0.16, "grad_norm": 1.5880937499575865, "learning_rate": 9.586307840935658e-06, "loss": 0.6878, "step": 2445 }, { "epoch": 0.16, "grad_norm": 1.7377733543008584, "learning_rate": 9.585894897487328e-06, "loss": 0.7451, "step": 2446 }, { "epoch": 0.16, "grad_norm": 1.5011547688326041, "learning_rate": 9.585481756946875e-06, "loss": 0.7305, "step": 2447 }, { "epoch": 0.16, "grad_norm": 1.3440092528575156, "learning_rate": 9.58506841933205e-06, "loss": 0.6414, "step": 2448 }, { "epoch": 0.16, "grad_norm": 2.3192060884670886, "learning_rate": 9.584654884660618e-06, "loss": 0.7602, "step": 2449 }, { "epoch": 0.16, "grad_norm": 1.637253043164758, "learning_rate": 9.584241152950353e-06, "loss": 0.6934, "step": 2450 }, { "epoch": 0.16, "grad_norm": 3.1276694184358433, "learning_rate": 9.583827224219035e-06, "loss": 0.73, "step": 2451 }, { "epoch": 0.16, "grad_norm": 1.6415930428706658, "learning_rate": 9.583413098484457e-06, "loss": 0.6779, "step": 2452 }, { "epoch": 0.16, "grad_norm": 1.9115649594959871, "learning_rate": 9.582998775764414e-06, "loss": 0.7888, "step": 2453 }, { "epoch": 0.16, "grad_norm": 1.493909481010363, "learning_rate": 9.582584256076715e-06, "loss": 0.7928, "step": 2454 }, { "epoch": 0.16, "grad_norm": 1.459667398345136, "learning_rate": 9.582169539439173e-06, "loss": 0.7098, "step": 2455 }, { "epoch": 0.16, "grad_norm": 1.5594121054698669, "learning_rate": 9.581754625869612e-06, "loss": 0.7007, "step": 2456 }, { "epoch": 0.16, "grad_norm": 1.7863695656524123, "learning_rate": 9.581339515385866e-06, "loss": 0.8196, "step": 2457 }, { "epoch": 0.16, "grad_norm": 1.5086230523714954, "learning_rate": 9.580924208005775e-06, "loss": 0.706, "step": 2458 }, { "epoch": 0.16, "grad_norm": 1.6513987408864634, "learning_rate": 9.580508703747185e-06, "loss": 0.7913, "step": 2459 }, { "epoch": 0.16, "grad_norm": 1.7259956178894966, "learning_rate": 9.580093002627958e-06, "loss": 0.7435, "step": 2460 }, { "epoch": 0.16, "grad_norm": 1.8296176727707607, "learning_rate": 9.579677104665957e-06, "loss": 0.7303, "step": 2461 }, { "epoch": 0.16, "grad_norm": 1.9194489930448213, "learning_rate": 9.579261009879057e-06, "loss": 0.749, "step": 2462 }, { "epoch": 0.16, "grad_norm": 1.1647899046560979, "learning_rate": 9.578844718285141e-06, "loss": 0.7203, "step": 2463 }, { "epoch": 0.16, "grad_norm": 1.8978075753465895, "learning_rate": 9.578428229902102e-06, "loss": 0.7727, "step": 2464 }, { "epoch": 0.16, "grad_norm": 1.5728082324068469, "learning_rate": 9.578011544747836e-06, "loss": 0.6812, "step": 2465 }, { "epoch": 0.16, "grad_norm": 1.7898006681861167, "learning_rate": 9.577594662840256e-06, "loss": 0.7989, "step": 2466 }, { "epoch": 0.16, "grad_norm": 1.5983137189595689, "learning_rate": 9.577177584197274e-06, "loss": 0.6561, "step": 2467 }, { "epoch": 0.16, "grad_norm": 1.670997123853677, "learning_rate": 9.576760308836819e-06, "loss": 0.7363, "step": 2468 }, { "epoch": 0.16, "grad_norm": 1.6359841736287313, "learning_rate": 9.576342836776822e-06, "loss": 0.7723, "step": 2469 }, { "epoch": 0.16, "grad_norm": 2.093378740144257, "learning_rate": 9.575925168035225e-06, "loss": 0.7817, "step": 2470 }, { "epoch": 0.16, "grad_norm": 1.1403376895271635, "learning_rate": 9.575507302629982e-06, "loss": 0.6687, "step": 2471 }, { "epoch": 0.16, "grad_norm": 1.6158028897243253, "learning_rate": 9.57508924057905e-06, "loss": 0.719, "step": 2472 }, { "epoch": 0.16, "grad_norm": 1.845379996817796, "learning_rate": 9.574670981900394e-06, "loss": 0.823, "step": 2473 }, { "epoch": 0.16, "grad_norm": 1.6363583105774082, "learning_rate": 9.574252526611994e-06, "loss": 0.7458, "step": 2474 }, { "epoch": 0.16, "grad_norm": 1.5129239183728354, "learning_rate": 9.57383387473183e-06, "loss": 0.6695, "step": 2475 }, { "epoch": 0.16, "grad_norm": 1.7341343358256658, "learning_rate": 9.573415026277896e-06, "loss": 0.8382, "step": 2476 }, { "epoch": 0.16, "grad_norm": 1.152837959661525, "learning_rate": 9.572995981268199e-06, "loss": 0.6208, "step": 2477 }, { "epoch": 0.16, "grad_norm": 1.572480044867894, "learning_rate": 9.57257673972074e-06, "loss": 0.7072, "step": 2478 }, { "epoch": 0.16, "grad_norm": 2.172707203363361, "learning_rate": 9.572157301653542e-06, "loss": 0.8697, "step": 2479 }, { "epoch": 0.16, "grad_norm": 1.7039695279706502, "learning_rate": 9.571737667084631e-06, "loss": 0.751, "step": 2480 }, { "epoch": 0.16, "grad_norm": 1.5351057359553706, "learning_rate": 9.571317836032042e-06, "loss": 0.706, "step": 2481 }, { "epoch": 0.16, "grad_norm": 1.627307403965839, "learning_rate": 9.570897808513818e-06, "loss": 0.7727, "step": 2482 }, { "epoch": 0.16, "grad_norm": 1.7830271532637785, "learning_rate": 9.570477584548008e-06, "loss": 0.79, "step": 2483 }, { "epoch": 0.16, "grad_norm": 1.7052878139902092, "learning_rate": 9.570057164152679e-06, "loss": 0.7896, "step": 2484 }, { "epoch": 0.16, "grad_norm": 1.4526476855346426, "learning_rate": 9.569636547345895e-06, "loss": 0.7325, "step": 2485 }, { "epoch": 0.16, "grad_norm": 1.5416537375794583, "learning_rate": 9.569215734145733e-06, "loss": 0.7158, "step": 2486 }, { "epoch": 0.16, "grad_norm": 1.9173081720082354, "learning_rate": 9.568794724570282e-06, "loss": 0.792, "step": 2487 }, { "epoch": 0.16, "grad_norm": 1.2074032902810923, "learning_rate": 9.568373518637632e-06, "loss": 0.651, "step": 2488 }, { "epoch": 0.16, "grad_norm": 2.025391814157172, "learning_rate": 9.567952116365889e-06, "loss": 0.7508, "step": 2489 }, { "epoch": 0.16, "grad_norm": 1.7419314897493805, "learning_rate": 9.567530517773163e-06, "loss": 0.5978, "step": 2490 }, { "epoch": 0.16, "grad_norm": 2.1367449058613275, "learning_rate": 9.567108722877572e-06, "loss": 0.7024, "step": 2491 }, { "epoch": 0.16, "grad_norm": 1.1217053057649513, "learning_rate": 9.566686731697246e-06, "loss": 0.6653, "step": 2492 }, { "epoch": 0.16, "grad_norm": 1.4808283747448565, "learning_rate": 9.566264544250319e-06, "loss": 0.8007, "step": 2493 }, { "epoch": 0.16, "grad_norm": 1.5886811160340755, "learning_rate": 9.565842160554938e-06, "loss": 0.7538, "step": 2494 }, { "epoch": 0.16, "grad_norm": 1.557529150620196, "learning_rate": 9.565419580629254e-06, "loss": 0.7944, "step": 2495 }, { "epoch": 0.16, "grad_norm": 1.6060528764764803, "learning_rate": 9.56499680449143e-06, "loss": 0.7139, "step": 2496 }, { "epoch": 0.16, "grad_norm": 1.5797558035978998, "learning_rate": 9.564573832159638e-06, "loss": 0.6881, "step": 2497 }, { "epoch": 0.16, "grad_norm": 1.4187022760395207, "learning_rate": 9.564150663652053e-06, "loss": 0.6929, "step": 2498 }, { "epoch": 0.16, "grad_norm": 1.7772825324187649, "learning_rate": 9.56372729898686e-06, "loss": 0.8641, "step": 2499 }, { "epoch": 0.16, "grad_norm": 1.5466200160876904, "learning_rate": 9.56330373818226e-06, "loss": 0.7234, "step": 2500 }, { "epoch": 0.16, "grad_norm": 1.8979007371622725, "learning_rate": 9.562879981256455e-06, "loss": 0.8602, "step": 2501 }, { "epoch": 0.16, "grad_norm": 1.588320653825185, "learning_rate": 9.562456028227654e-06, "loss": 0.6794, "step": 2502 }, { "epoch": 0.16, "grad_norm": 1.145909173372835, "learning_rate": 9.562031879114082e-06, "loss": 0.7217, "step": 2503 }, { "epoch": 0.16, "grad_norm": 1.7271636288768366, "learning_rate": 9.561607533933965e-06, "loss": 0.8121, "step": 2504 }, { "epoch": 0.16, "grad_norm": 1.4897208935163688, "learning_rate": 9.561182992705541e-06, "loss": 0.7384, "step": 2505 }, { "epoch": 0.16, "grad_norm": 1.2091841069512126, "learning_rate": 9.560758255447058e-06, "loss": 0.7436, "step": 2506 }, { "epoch": 0.16, "grad_norm": 1.7315578025888125, "learning_rate": 9.560333322176767e-06, "loss": 0.7933, "step": 2507 }, { "epoch": 0.16, "grad_norm": 1.6594477103024667, "learning_rate": 9.559908192912933e-06, "loss": 0.7457, "step": 2508 }, { "epoch": 0.16, "grad_norm": 1.5924321839958262, "learning_rate": 9.559482867673825e-06, "loss": 0.7395, "step": 2509 }, { "epoch": 0.16, "grad_norm": 1.8849867284275528, "learning_rate": 9.559057346477726e-06, "loss": 0.7995, "step": 2510 }, { "epoch": 0.16, "grad_norm": 1.482287261932715, "learning_rate": 9.558631629342922e-06, "loss": 0.7468, "step": 2511 }, { "epoch": 0.16, "grad_norm": 1.6534171853695436, "learning_rate": 9.558205716287711e-06, "loss": 0.6576, "step": 2512 }, { "epoch": 0.16, "grad_norm": 1.6705492518583431, "learning_rate": 9.557779607330393e-06, "loss": 0.8454, "step": 2513 }, { "epoch": 0.16, "grad_norm": 1.5440986376325985, "learning_rate": 9.557353302489286e-06, "loss": 0.7656, "step": 2514 }, { "epoch": 0.16, "grad_norm": 1.6508272073504382, "learning_rate": 9.556926801782714e-06, "loss": 0.7396, "step": 2515 }, { "epoch": 0.16, "grad_norm": 1.6229727110503482, "learning_rate": 9.556500105229e-06, "loss": 0.7449, "step": 2516 }, { "epoch": 0.16, "grad_norm": 1.6213989039635373, "learning_rate": 9.556073212846485e-06, "loss": 0.7722, "step": 2517 }, { "epoch": 0.16, "grad_norm": 2.046644510893559, "learning_rate": 9.555646124653519e-06, "loss": 0.7236, "step": 2518 }, { "epoch": 0.16, "grad_norm": 1.5225714210190238, "learning_rate": 9.555218840668454e-06, "loss": 0.7835, "step": 2519 }, { "epoch": 0.16, "grad_norm": 1.5637415907217183, "learning_rate": 9.554791360909657e-06, "loss": 0.7269, "step": 2520 }, { "epoch": 0.16, "grad_norm": 2.211910040120327, "learning_rate": 9.554363685395496e-06, "loss": 0.8015, "step": 2521 }, { "epoch": 0.16, "grad_norm": 1.6365682029933424, "learning_rate": 9.553935814144355e-06, "loss": 0.7797, "step": 2522 }, { "epoch": 0.16, "grad_norm": 1.0887975217125636, "learning_rate": 9.553507747174622e-06, "loss": 0.7581, "step": 2523 }, { "epoch": 0.16, "grad_norm": 1.5585180219634236, "learning_rate": 9.553079484504693e-06, "loss": 0.7081, "step": 2524 }, { "epoch": 0.16, "grad_norm": 1.7463015796800987, "learning_rate": 9.552651026152978e-06, "loss": 0.7686, "step": 2525 }, { "epoch": 0.16, "grad_norm": 1.6812448281589394, "learning_rate": 9.552222372137884e-06, "loss": 0.7189, "step": 2526 }, { "epoch": 0.16, "grad_norm": 1.621651542513145, "learning_rate": 9.551793522477842e-06, "loss": 0.7382, "step": 2527 }, { "epoch": 0.16, "grad_norm": 1.607828458969437, "learning_rate": 9.551364477191276e-06, "loss": 0.839, "step": 2528 }, { "epoch": 0.16, "grad_norm": 1.5258798801102682, "learning_rate": 9.55093523629663e-06, "loss": 0.6672, "step": 2529 }, { "epoch": 0.16, "grad_norm": 1.2188199223487122, "learning_rate": 9.550505799812351e-06, "loss": 0.6473, "step": 2530 }, { "epoch": 0.16, "grad_norm": 1.7134698860352877, "learning_rate": 9.550076167756892e-06, "loss": 0.7766, "step": 2531 }, { "epoch": 0.16, "grad_norm": 1.5253421551226787, "learning_rate": 9.549646340148725e-06, "loss": 0.716, "step": 2532 }, { "epoch": 0.16, "grad_norm": 1.7235944823024345, "learning_rate": 9.549216317006313e-06, "loss": 0.7531, "step": 2533 }, { "epoch": 0.16, "grad_norm": 1.4727400592772955, "learning_rate": 9.548786098348146e-06, "loss": 0.7906, "step": 2534 }, { "epoch": 0.16, "grad_norm": 1.9063402046742228, "learning_rate": 9.548355684192712e-06, "loss": 0.8164, "step": 2535 }, { "epoch": 0.16, "grad_norm": 1.6765865844277028, "learning_rate": 9.547925074558505e-06, "loss": 0.6995, "step": 2536 }, { "epoch": 0.16, "grad_norm": 1.6676867931435704, "learning_rate": 9.547494269464037e-06, "loss": 0.7522, "step": 2537 }, { "epoch": 0.16, "grad_norm": 1.5654352117865014, "learning_rate": 9.54706326892782e-06, "loss": 0.7835, "step": 2538 }, { "epoch": 0.16, "grad_norm": 1.727778301137537, "learning_rate": 9.546632072968379e-06, "loss": 0.7262, "step": 2539 }, { "epoch": 0.16, "grad_norm": 1.5790802311263916, "learning_rate": 9.546200681604243e-06, "loss": 0.7622, "step": 2540 }, { "epoch": 0.16, "grad_norm": 1.548457420711779, "learning_rate": 9.545769094853958e-06, "loss": 0.7009, "step": 2541 }, { "epoch": 0.16, "grad_norm": 1.7336720559767693, "learning_rate": 9.545337312736066e-06, "loss": 0.7801, "step": 2542 }, { "epoch": 0.16, "grad_norm": 1.783901247788135, "learning_rate": 9.54490533526913e-06, "loss": 0.8635, "step": 2543 }, { "epoch": 0.16, "grad_norm": 1.1783916612031604, "learning_rate": 9.544473162471713e-06, "loss": 0.6613, "step": 2544 }, { "epoch": 0.16, "grad_norm": 1.506394750854726, "learning_rate": 9.544040794362389e-06, "loss": 0.7515, "step": 2545 }, { "epoch": 0.16, "grad_norm": 1.2778383523131611, "learning_rate": 9.543608230959738e-06, "loss": 0.5959, "step": 2546 }, { "epoch": 0.16, "grad_norm": 1.6778336032851624, "learning_rate": 9.543175472282353e-06, "loss": 0.6764, "step": 2547 }, { "epoch": 0.16, "grad_norm": 1.4676376910923592, "learning_rate": 9.542742518348833e-06, "loss": 0.7749, "step": 2548 }, { "epoch": 0.16, "grad_norm": 1.5425789531602674, "learning_rate": 9.542309369177785e-06, "loss": 0.7082, "step": 2549 }, { "epoch": 0.16, "grad_norm": 1.5064698484257806, "learning_rate": 9.541876024787825e-06, "loss": 0.7677, "step": 2550 }, { "epoch": 0.16, "grad_norm": 1.1058394711307595, "learning_rate": 9.541442485197577e-06, "loss": 0.6707, "step": 2551 }, { "epoch": 0.16, "grad_norm": 1.1733947526191775, "learning_rate": 9.541008750425676e-06, "loss": 0.6355, "step": 2552 }, { "epoch": 0.16, "grad_norm": 1.3002171963625833, "learning_rate": 9.540574820490759e-06, "loss": 0.792, "step": 2553 }, { "epoch": 0.16, "grad_norm": 1.05403681871851, "learning_rate": 9.540140695411478e-06, "loss": 0.6837, "step": 2554 }, { "epoch": 0.16, "grad_norm": 1.4492705922448694, "learning_rate": 9.539706375206487e-06, "loss": 0.7417, "step": 2555 }, { "epoch": 0.16, "grad_norm": 1.5924326426765356, "learning_rate": 9.539271859894459e-06, "loss": 0.8508, "step": 2556 }, { "epoch": 0.16, "grad_norm": 1.6088541950210837, "learning_rate": 9.538837149494065e-06, "loss": 0.8102, "step": 2557 }, { "epoch": 0.16, "grad_norm": 1.6098867783465602, "learning_rate": 9.538402244023986e-06, "loss": 0.7197, "step": 2558 }, { "epoch": 0.16, "grad_norm": 2.1601609015781587, "learning_rate": 9.537967143502915e-06, "loss": 0.7285, "step": 2559 }, { "epoch": 0.16, "grad_norm": 1.1568149795109248, "learning_rate": 9.537531847949553e-06, "loss": 0.7331, "step": 2560 }, { "epoch": 0.16, "grad_norm": 1.676754599318091, "learning_rate": 9.537096357382606e-06, "loss": 0.7925, "step": 2561 }, { "epoch": 0.16, "grad_norm": 1.6031594281373938, "learning_rate": 9.53666067182079e-06, "loss": 0.7583, "step": 2562 }, { "epoch": 0.16, "grad_norm": 1.9904164818681946, "learning_rate": 9.536224791282834e-06, "loss": 0.839, "step": 2563 }, { "epoch": 0.16, "grad_norm": 1.8676751048090618, "learning_rate": 9.535788715787465e-06, "loss": 0.7811, "step": 2564 }, { "epoch": 0.16, "grad_norm": 1.5563026948511958, "learning_rate": 9.53535244535343e-06, "loss": 0.8461, "step": 2565 }, { "epoch": 0.16, "grad_norm": 1.737188018331867, "learning_rate": 9.534915979999476e-06, "loss": 0.7701, "step": 2566 }, { "epoch": 0.16, "grad_norm": 1.5998555479156398, "learning_rate": 9.534479319744366e-06, "loss": 0.8224, "step": 2567 }, { "epoch": 0.16, "grad_norm": 1.0637891207626824, "learning_rate": 9.534042464606859e-06, "loss": 0.5668, "step": 2568 }, { "epoch": 0.16, "grad_norm": 1.5579531094975312, "learning_rate": 9.533605414605736e-06, "loss": 0.7168, "step": 2569 }, { "epoch": 0.16, "grad_norm": 1.5175141280297066, "learning_rate": 9.533168169759778e-06, "loss": 0.6991, "step": 2570 }, { "epoch": 0.16, "grad_norm": 2.1773220276561647, "learning_rate": 9.532730730087779e-06, "loss": 0.8265, "step": 2571 }, { "epoch": 0.16, "grad_norm": 1.5813359995516123, "learning_rate": 9.532293095608535e-06, "loss": 0.7276, "step": 2572 }, { "epoch": 0.16, "grad_norm": 1.6030758340193527, "learning_rate": 9.53185526634086e-06, "loss": 0.8005, "step": 2573 }, { "epoch": 0.16, "grad_norm": 1.4605265093745727, "learning_rate": 9.531417242303566e-06, "loss": 0.7083, "step": 2574 }, { "epoch": 0.16, "grad_norm": 1.4559214615968, "learning_rate": 9.530979023515483e-06, "loss": 0.7066, "step": 2575 }, { "epoch": 0.16, "grad_norm": 3.3105224824905357, "learning_rate": 9.530540609995441e-06, "loss": 0.7951, "step": 2576 }, { "epoch": 0.16, "grad_norm": 1.5393462646768996, "learning_rate": 9.530102001762285e-06, "loss": 0.802, "step": 2577 }, { "epoch": 0.17, "grad_norm": 1.8536225505283785, "learning_rate": 9.529663198834862e-06, "loss": 0.7676, "step": 2578 }, { "epoch": 0.17, "grad_norm": 1.5332440724199454, "learning_rate": 9.529224201232034e-06, "loss": 0.7523, "step": 2579 }, { "epoch": 0.17, "grad_norm": 1.7221554859570398, "learning_rate": 9.528785008972667e-06, "loss": 0.7672, "step": 2580 }, { "epoch": 0.17, "grad_norm": 1.6144633265225217, "learning_rate": 9.528345622075636e-06, "loss": 0.7152, "step": 2581 }, { "epoch": 0.17, "grad_norm": 1.697606648297804, "learning_rate": 9.527906040559828e-06, "loss": 0.7086, "step": 2582 }, { "epoch": 0.17, "grad_norm": 1.4680272287009748, "learning_rate": 9.52746626444413e-06, "loss": 0.7093, "step": 2583 }, { "epoch": 0.17, "grad_norm": 1.712761478913779, "learning_rate": 9.527026293747446e-06, "loss": 0.7748, "step": 2584 }, { "epoch": 0.17, "grad_norm": 1.3972416104508953, "learning_rate": 9.526586128488686e-06, "loss": 0.7019, "step": 2585 }, { "epoch": 0.17, "grad_norm": 1.5137849256320297, "learning_rate": 9.526145768686765e-06, "loss": 0.821, "step": 2586 }, { "epoch": 0.17, "grad_norm": 1.0870868442063957, "learning_rate": 9.52570521436061e-06, "loss": 0.4953, "step": 2587 }, { "epoch": 0.17, "grad_norm": 1.7687035655261163, "learning_rate": 9.525264465529154e-06, "loss": 0.682, "step": 2588 }, { "epoch": 0.17, "grad_norm": 1.5334984112881145, "learning_rate": 9.52482352221134e-06, "loss": 0.7119, "step": 2589 }, { "epoch": 0.17, "grad_norm": 1.4805942635108558, "learning_rate": 9.524382384426119e-06, "loss": 0.7673, "step": 2590 }, { "epoch": 0.17, "grad_norm": 1.4635896060381905, "learning_rate": 9.52394105219245e-06, "loss": 0.7689, "step": 2591 }, { "epoch": 0.17, "grad_norm": 1.1155623762995035, "learning_rate": 9.523499525529302e-06, "loss": 0.6878, "step": 2592 }, { "epoch": 0.17, "grad_norm": 1.746324704940586, "learning_rate": 9.523057804455648e-06, "loss": 0.7588, "step": 2593 }, { "epoch": 0.17, "grad_norm": 1.3824101071893184, "learning_rate": 9.522615888990476e-06, "loss": 0.7854, "step": 2594 }, { "epoch": 0.17, "grad_norm": 1.453011058969075, "learning_rate": 9.522173779152773e-06, "loss": 0.7124, "step": 2595 }, { "epoch": 0.17, "grad_norm": 1.737445383731565, "learning_rate": 9.521731474961547e-06, "loss": 0.7714, "step": 2596 }, { "epoch": 0.17, "grad_norm": 1.6046880181259895, "learning_rate": 9.5212889764358e-06, "loss": 0.7624, "step": 2597 }, { "epoch": 0.17, "grad_norm": 1.7549782131996354, "learning_rate": 9.520846283594555e-06, "loss": 0.7542, "step": 2598 }, { "epoch": 0.17, "grad_norm": 1.594048194521305, "learning_rate": 9.520403396456838e-06, "loss": 0.7321, "step": 2599 }, { "epoch": 0.17, "grad_norm": 1.177151656724523, "learning_rate": 9.519960315041681e-06, "loss": 0.6158, "step": 2600 }, { "epoch": 0.17, "grad_norm": 1.3736441091405425, "learning_rate": 9.519517039368127e-06, "loss": 0.7241, "step": 2601 }, { "epoch": 0.17, "grad_norm": 1.6900627337940635, "learning_rate": 9.519073569455225e-06, "loss": 0.8039, "step": 2602 }, { "epoch": 0.17, "grad_norm": 1.936871218065415, "learning_rate": 9.518629905322041e-06, "loss": 0.6897, "step": 2603 }, { "epoch": 0.17, "grad_norm": 1.7851247654915476, "learning_rate": 9.518186046987636e-06, "loss": 0.6928, "step": 2604 }, { "epoch": 0.17, "grad_norm": 1.5558002282802454, "learning_rate": 9.517741994471091e-06, "loss": 0.7547, "step": 2605 }, { "epoch": 0.17, "grad_norm": 1.5681013986177699, "learning_rate": 9.517297747791485e-06, "loss": 0.877, "step": 2606 }, { "epoch": 0.17, "grad_norm": 1.588213956262727, "learning_rate": 9.516853306967917e-06, "loss": 0.9793, "step": 2607 }, { "epoch": 0.17, "grad_norm": 1.3202851063000667, "learning_rate": 9.516408672019482e-06, "loss": 0.7134, "step": 2608 }, { "epoch": 0.17, "grad_norm": 1.495187848572914, "learning_rate": 9.515963842965294e-06, "loss": 0.6723, "step": 2609 }, { "epoch": 0.17, "grad_norm": 2.612941562546837, "learning_rate": 9.51551881982447e-06, "loss": 0.7558, "step": 2610 }, { "epoch": 0.17, "grad_norm": 1.5803980247547078, "learning_rate": 9.515073602616135e-06, "loss": 0.8034, "step": 2611 }, { "epoch": 0.17, "grad_norm": 1.6129805020739225, "learning_rate": 9.514628191359426e-06, "loss": 0.7789, "step": 2612 }, { "epoch": 0.17, "grad_norm": 1.4211266679435854, "learning_rate": 9.51418258607348e-06, "loss": 0.7207, "step": 2613 }, { "epoch": 0.17, "grad_norm": 1.7299883138391627, "learning_rate": 9.513736786777455e-06, "loss": 0.6927, "step": 2614 }, { "epoch": 0.17, "grad_norm": 1.7112352078589756, "learning_rate": 9.513290793490506e-06, "loss": 0.7704, "step": 2615 }, { "epoch": 0.17, "grad_norm": 1.5160143353846325, "learning_rate": 9.512844606231804e-06, "loss": 0.7229, "step": 2616 }, { "epoch": 0.17, "grad_norm": 1.4934944932725485, "learning_rate": 9.512398225020523e-06, "loss": 0.7715, "step": 2617 }, { "epoch": 0.17, "grad_norm": 1.6344379044746016, "learning_rate": 9.511951649875846e-06, "loss": 0.6886, "step": 2618 }, { "epoch": 0.17, "grad_norm": 1.698569744993134, "learning_rate": 9.511504880816971e-06, "loss": 0.7518, "step": 2619 }, { "epoch": 0.17, "grad_norm": 1.6660244104891657, "learning_rate": 9.511057917863094e-06, "loss": 0.823, "step": 2620 }, { "epoch": 0.17, "grad_norm": 1.7426763188882664, "learning_rate": 9.510610761033427e-06, "loss": 0.7556, "step": 2621 }, { "epoch": 0.17, "grad_norm": 1.646729988196886, "learning_rate": 9.51016341034719e-06, "loss": 0.7759, "step": 2622 }, { "epoch": 0.17, "grad_norm": 1.719285097676622, "learning_rate": 9.509715865823605e-06, "loss": 0.7443, "step": 2623 }, { "epoch": 0.17, "grad_norm": 1.6407423582631553, "learning_rate": 9.509268127481907e-06, "loss": 0.6951, "step": 2624 }, { "epoch": 0.17, "grad_norm": 1.5547935333156657, "learning_rate": 9.508820195341343e-06, "loss": 0.7139, "step": 2625 }, { "epoch": 0.17, "grad_norm": 1.4988169140341048, "learning_rate": 9.508372069421159e-06, "loss": 0.7313, "step": 2626 }, { "epoch": 0.17, "grad_norm": 1.720119246044155, "learning_rate": 9.507923749740619e-06, "loss": 0.7131, "step": 2627 }, { "epoch": 0.17, "grad_norm": 1.498426267374263, "learning_rate": 9.507475236318987e-06, "loss": 0.6881, "step": 2628 }, { "epoch": 0.17, "grad_norm": 1.2967359905935072, "learning_rate": 9.50702652917554e-06, "loss": 0.6789, "step": 2629 }, { "epoch": 0.17, "grad_norm": 3.0266723134290543, "learning_rate": 9.506577628329566e-06, "loss": 0.7359, "step": 2630 }, { "epoch": 0.17, "grad_norm": 1.765436616410348, "learning_rate": 9.506128533800353e-06, "loss": 0.7773, "step": 2631 }, { "epoch": 0.17, "grad_norm": 2.139130255725122, "learning_rate": 9.505679245607205e-06, "loss": 0.8369, "step": 2632 }, { "epoch": 0.17, "grad_norm": 1.0811783314983194, "learning_rate": 9.505229763769432e-06, "loss": 0.6009, "step": 2633 }, { "epoch": 0.17, "grad_norm": 1.6867712590327137, "learning_rate": 9.504780088306349e-06, "loss": 0.6984, "step": 2634 }, { "epoch": 0.17, "grad_norm": 1.4039022914858417, "learning_rate": 9.504330219237284e-06, "loss": 0.6284, "step": 2635 }, { "epoch": 0.17, "grad_norm": 1.5508112242677077, "learning_rate": 9.503880156581571e-06, "loss": 0.6271, "step": 2636 }, { "epoch": 0.17, "grad_norm": 1.6763133032064752, "learning_rate": 9.503429900358554e-06, "loss": 0.8246, "step": 2637 }, { "epoch": 0.17, "grad_norm": 1.585194638535153, "learning_rate": 9.502979450587582e-06, "loss": 0.7779, "step": 2638 }, { "epoch": 0.17, "grad_norm": 1.4850229517459652, "learning_rate": 9.502528807288014e-06, "loss": 0.6829, "step": 2639 }, { "epoch": 0.17, "grad_norm": 1.4668910273150273, "learning_rate": 9.50207797047922e-06, "loss": 0.8052, "step": 2640 }, { "epoch": 0.17, "grad_norm": 1.6951678386691222, "learning_rate": 9.501626940180574e-06, "loss": 0.7083, "step": 2641 }, { "epoch": 0.17, "grad_norm": 1.1699313490111392, "learning_rate": 9.501175716411464e-06, "loss": 0.6391, "step": 2642 }, { "epoch": 0.17, "grad_norm": 1.6367819986131826, "learning_rate": 9.50072429919128e-06, "loss": 0.7044, "step": 2643 }, { "epoch": 0.17, "grad_norm": 1.9068620681988055, "learning_rate": 9.50027268853942e-06, "loss": 0.6951, "step": 2644 }, { "epoch": 0.17, "grad_norm": 1.7516020635276621, "learning_rate": 9.499820884475296e-06, "loss": 0.8114, "step": 2645 }, { "epoch": 0.17, "grad_norm": 1.8667667611544543, "learning_rate": 9.49936888701833e-06, "loss": 0.7433, "step": 2646 }, { "epoch": 0.17, "grad_norm": 1.637843129559601, "learning_rate": 9.498916696187942e-06, "loss": 0.7658, "step": 2647 }, { "epoch": 0.17, "grad_norm": 1.834683538629182, "learning_rate": 9.498464312003565e-06, "loss": 0.7246, "step": 2648 }, { "epoch": 0.17, "grad_norm": 1.5173293578721048, "learning_rate": 9.498011734484647e-06, "loss": 0.7896, "step": 2649 }, { "epoch": 0.17, "grad_norm": 1.474488359543994, "learning_rate": 9.497558963650635e-06, "loss": 0.7007, "step": 2650 }, { "epoch": 0.17, "grad_norm": 1.1852208043347203, "learning_rate": 9.49710599952099e-06, "loss": 0.6053, "step": 2651 }, { "epoch": 0.17, "grad_norm": 1.7587327026873245, "learning_rate": 9.49665284211518e-06, "loss": 0.71, "step": 2652 }, { "epoch": 0.17, "grad_norm": 1.7453630472019739, "learning_rate": 9.49619949145268e-06, "loss": 0.7969, "step": 2653 }, { "epoch": 0.17, "grad_norm": 1.5707804233126486, "learning_rate": 9.49574594755297e-06, "loss": 0.7824, "step": 2654 }, { "epoch": 0.17, "grad_norm": 1.4545688337396434, "learning_rate": 9.49529221043555e-06, "loss": 0.6742, "step": 2655 }, { "epoch": 0.17, "grad_norm": 1.5107650331722522, "learning_rate": 9.494838280119915e-06, "loss": 0.7477, "step": 2656 }, { "epoch": 0.17, "grad_norm": 1.3989031945427677, "learning_rate": 9.494384156625575e-06, "loss": 0.6962, "step": 2657 }, { "epoch": 0.17, "grad_norm": 1.617086545852062, "learning_rate": 9.493929839972048e-06, "loss": 0.7052, "step": 2658 }, { "epoch": 0.17, "grad_norm": 1.8419105479051505, "learning_rate": 9.49347533017886e-06, "loss": 0.7293, "step": 2659 }, { "epoch": 0.17, "grad_norm": 1.4792761775452818, "learning_rate": 9.493020627265545e-06, "loss": 0.7351, "step": 2660 }, { "epoch": 0.17, "grad_norm": 1.5803167747319913, "learning_rate": 9.492565731251645e-06, "loss": 0.7377, "step": 2661 }, { "epoch": 0.17, "grad_norm": 1.8481275148269753, "learning_rate": 9.492110642156708e-06, "loss": 0.7692, "step": 2662 }, { "epoch": 0.17, "grad_norm": 1.4672005787070608, "learning_rate": 9.491655360000298e-06, "loss": 0.6653, "step": 2663 }, { "epoch": 0.17, "grad_norm": 1.4478880471881046, "learning_rate": 9.491199884801976e-06, "loss": 0.7154, "step": 2664 }, { "epoch": 0.17, "grad_norm": 1.5718818799977086, "learning_rate": 9.490744216581323e-06, "loss": 0.8154, "step": 2665 }, { "epoch": 0.17, "grad_norm": 1.048126587096383, "learning_rate": 9.490288355357918e-06, "loss": 0.6415, "step": 2666 }, { "epoch": 0.17, "grad_norm": 1.8558472382235711, "learning_rate": 9.489832301151354e-06, "loss": 0.8324, "step": 2667 }, { "epoch": 0.17, "grad_norm": 1.6071552690841138, "learning_rate": 9.489376053981234e-06, "loss": 0.7734, "step": 2668 }, { "epoch": 0.17, "grad_norm": 1.7644971814129182, "learning_rate": 9.488919613867162e-06, "loss": 0.7038, "step": 2669 }, { "epoch": 0.17, "grad_norm": 1.5888502197846406, "learning_rate": 9.48846298082876e-06, "loss": 0.6839, "step": 2670 }, { "epoch": 0.17, "grad_norm": 1.848643701584465, "learning_rate": 9.48800615488565e-06, "loss": 0.7047, "step": 2671 }, { "epoch": 0.17, "grad_norm": 1.6943992636458387, "learning_rate": 9.487549136057467e-06, "loss": 0.8331, "step": 2672 }, { "epoch": 0.17, "grad_norm": 1.7244130285123387, "learning_rate": 9.487091924363852e-06, "loss": 0.7691, "step": 2673 }, { "epoch": 0.17, "grad_norm": 1.5065177829098364, "learning_rate": 9.486634519824453e-06, "loss": 0.6924, "step": 2674 }, { "epoch": 0.17, "grad_norm": 1.5228324901328179, "learning_rate": 9.486176922458929e-06, "loss": 0.6768, "step": 2675 }, { "epoch": 0.17, "grad_norm": 1.4294468072457973, "learning_rate": 9.48571913228695e-06, "loss": 0.7443, "step": 2676 }, { "epoch": 0.17, "grad_norm": 1.4287202297105361, "learning_rate": 9.485261149328189e-06, "loss": 0.6974, "step": 2677 }, { "epoch": 0.17, "grad_norm": 1.5248204503952316, "learning_rate": 9.484802973602328e-06, "loss": 0.7315, "step": 2678 }, { "epoch": 0.17, "grad_norm": 1.0925788758017068, "learning_rate": 9.484344605129057e-06, "loss": 0.5951, "step": 2679 }, { "epoch": 0.17, "grad_norm": 1.268561068068901, "learning_rate": 9.483886043928078e-06, "loss": 0.6431, "step": 2680 }, { "epoch": 0.17, "grad_norm": 1.0893228869007985, "learning_rate": 9.4834272900191e-06, "loss": 0.8381, "step": 2681 }, { "epoch": 0.17, "grad_norm": 1.5701687688027952, "learning_rate": 9.48296834342184e-06, "loss": 0.7794, "step": 2682 }, { "epoch": 0.17, "grad_norm": 1.5022528551474574, "learning_rate": 9.482509204156019e-06, "loss": 0.8227, "step": 2683 }, { "epoch": 0.17, "grad_norm": 6.702395415012941, "learning_rate": 9.48204987224137e-06, "loss": 0.6906, "step": 2684 }, { "epoch": 0.17, "grad_norm": 1.4533303879224586, "learning_rate": 9.481590347697638e-06, "loss": 0.6503, "step": 2685 }, { "epoch": 0.17, "grad_norm": 1.7532690380372473, "learning_rate": 9.481130630544569e-06, "loss": 0.7826, "step": 2686 }, { "epoch": 0.17, "grad_norm": 2.3254375086992636, "learning_rate": 9.480670720801921e-06, "loss": 0.7635, "step": 2687 }, { "epoch": 0.17, "grad_norm": 1.6084005996779993, "learning_rate": 9.48021061848946e-06, "loss": 0.7552, "step": 2688 }, { "epoch": 0.17, "grad_norm": 1.6738677982200123, "learning_rate": 9.479750323626963e-06, "loss": 0.7103, "step": 2689 }, { "epoch": 0.17, "grad_norm": 1.5911906381765049, "learning_rate": 9.47928983623421e-06, "loss": 0.7758, "step": 2690 }, { "epoch": 0.17, "grad_norm": 1.6160007023650886, "learning_rate": 9.47882915633099e-06, "loss": 0.7262, "step": 2691 }, { "epoch": 0.17, "grad_norm": 1.6846855168743444, "learning_rate": 9.478368283937106e-06, "loss": 0.7576, "step": 2692 }, { "epoch": 0.17, "grad_norm": 1.481490758203406, "learning_rate": 9.477907219072362e-06, "loss": 0.6824, "step": 2693 }, { "epoch": 0.17, "grad_norm": 1.522769300011839, "learning_rate": 9.477445961756577e-06, "loss": 0.7052, "step": 2694 }, { "epoch": 0.17, "grad_norm": 1.5874377117125074, "learning_rate": 9.476984512009572e-06, "loss": 0.7487, "step": 2695 }, { "epoch": 0.17, "grad_norm": 1.555290088724752, "learning_rate": 9.47652286985118e-06, "loss": 0.7422, "step": 2696 }, { "epoch": 0.17, "grad_norm": 1.5447158882397587, "learning_rate": 9.476061035301242e-06, "loss": 0.7775, "step": 2697 }, { "epoch": 0.17, "grad_norm": 1.5858020171953826, "learning_rate": 9.475599008379606e-06, "loss": 0.7169, "step": 2698 }, { "epoch": 0.17, "grad_norm": 1.7598500142849982, "learning_rate": 9.475136789106128e-06, "loss": 0.7456, "step": 2699 }, { "epoch": 0.17, "grad_norm": 1.7657907586607298, "learning_rate": 9.474674377500677e-06, "loss": 0.6793, "step": 2700 }, { "epoch": 0.17, "grad_norm": 1.5729454637037132, "learning_rate": 9.474211773583122e-06, "loss": 0.7648, "step": 2701 }, { "epoch": 0.17, "grad_norm": 1.541618020186174, "learning_rate": 9.473748977373346e-06, "loss": 0.7285, "step": 2702 }, { "epoch": 0.17, "grad_norm": 1.7754585255034956, "learning_rate": 9.47328598889124e-06, "loss": 0.7712, "step": 2703 }, { "epoch": 0.17, "grad_norm": 1.514668050351204, "learning_rate": 9.472822808156704e-06, "loss": 0.6487, "step": 2704 }, { "epoch": 0.17, "grad_norm": 1.100242866647283, "learning_rate": 9.47235943518964e-06, "loss": 0.6473, "step": 2705 }, { "epoch": 0.17, "grad_norm": 1.9280714953917597, "learning_rate": 9.471895870009966e-06, "loss": 0.7297, "step": 2706 }, { "epoch": 0.17, "grad_norm": 1.4679546888942492, "learning_rate": 9.471432112637604e-06, "loss": 0.6871, "step": 2707 }, { "epoch": 0.17, "grad_norm": 1.5959349590119356, "learning_rate": 9.470968163092486e-06, "loss": 0.7906, "step": 2708 }, { "epoch": 0.17, "grad_norm": 1.393443800653132, "learning_rate": 9.47050402139455e-06, "loss": 0.6615, "step": 2709 }, { "epoch": 0.17, "grad_norm": 1.4910060826303528, "learning_rate": 9.470039687563747e-06, "loss": 0.8635, "step": 2710 }, { "epoch": 0.17, "grad_norm": 1.5550180423423334, "learning_rate": 9.469575161620029e-06, "loss": 0.7226, "step": 2711 }, { "epoch": 0.17, "grad_norm": 1.695983767718332, "learning_rate": 9.469110443583363e-06, "loss": 0.7109, "step": 2712 }, { "epoch": 0.17, "grad_norm": 2.108719007144646, "learning_rate": 9.468645533473721e-06, "loss": 0.7669, "step": 2713 }, { "epoch": 0.17, "grad_norm": 1.462161694452234, "learning_rate": 9.468180431311085e-06, "loss": 0.684, "step": 2714 }, { "epoch": 0.17, "grad_norm": 1.607923102372329, "learning_rate": 9.467715137115442e-06, "loss": 0.8058, "step": 2715 }, { "epoch": 0.17, "grad_norm": 1.6801116596323717, "learning_rate": 9.467249650906792e-06, "loss": 0.8672, "step": 2716 }, { "epoch": 0.17, "grad_norm": 1.5119091226580963, "learning_rate": 9.466783972705138e-06, "loss": 0.71, "step": 2717 }, { "epoch": 0.17, "grad_norm": 1.530643817396638, "learning_rate": 9.466318102530494e-06, "loss": 0.7518, "step": 2718 }, { "epoch": 0.17, "grad_norm": 1.6199355402491173, "learning_rate": 9.465852040402883e-06, "loss": 0.7869, "step": 2719 }, { "epoch": 0.17, "grad_norm": 1.397326380282496, "learning_rate": 9.465385786342337e-06, "loss": 0.7196, "step": 2720 }, { "epoch": 0.17, "grad_norm": 1.7312456471614164, "learning_rate": 9.46491934036889e-06, "loss": 0.681, "step": 2721 }, { "epoch": 0.17, "grad_norm": 1.6545039008254856, "learning_rate": 9.464452702502595e-06, "loss": 0.7532, "step": 2722 }, { "epoch": 0.17, "grad_norm": 1.4957616421794901, "learning_rate": 9.463985872763501e-06, "loss": 0.7507, "step": 2723 }, { "epoch": 0.17, "grad_norm": 1.9237519468756359, "learning_rate": 9.463518851171678e-06, "loss": 0.7521, "step": 2724 }, { "epoch": 0.17, "grad_norm": 1.4494055799724241, "learning_rate": 9.463051637747191e-06, "loss": 0.7258, "step": 2725 }, { "epoch": 0.17, "grad_norm": 1.576793599102737, "learning_rate": 9.462584232510123e-06, "loss": 0.7481, "step": 2726 }, { "epoch": 0.17, "grad_norm": 1.6637347853792432, "learning_rate": 9.462116635480562e-06, "loss": 0.7816, "step": 2727 }, { "epoch": 0.17, "grad_norm": 1.1917831871257127, "learning_rate": 9.461648846678605e-06, "loss": 0.7018, "step": 2728 }, { "epoch": 0.17, "grad_norm": 1.440620472758117, "learning_rate": 9.461180866124357e-06, "loss": 0.8006, "step": 2729 }, { "epoch": 0.17, "grad_norm": 1.3906915268531275, "learning_rate": 9.460712693837928e-06, "loss": 0.6924, "step": 2730 }, { "epoch": 0.17, "grad_norm": 1.5553342011368019, "learning_rate": 9.46024432983944e-06, "loss": 0.7, "step": 2731 }, { "epoch": 0.17, "grad_norm": 1.6579785392489468, "learning_rate": 9.459775774149024e-06, "loss": 0.8468, "step": 2732 }, { "epoch": 0.17, "grad_norm": 1.9646390523497665, "learning_rate": 9.459307026786817e-06, "loss": 0.7137, "step": 2733 }, { "epoch": 0.17, "grad_norm": 1.496675983205751, "learning_rate": 9.458838087772963e-06, "loss": 0.6703, "step": 2734 }, { "epoch": 0.18, "grad_norm": 1.698464322686784, "learning_rate": 9.458368957127616e-06, "loss": 0.7593, "step": 2735 }, { "epoch": 0.18, "grad_norm": 1.5924020051757368, "learning_rate": 9.457899634870944e-06, "loss": 0.7232, "step": 2736 }, { "epoch": 0.18, "grad_norm": 1.533334113082526, "learning_rate": 9.45743012102311e-06, "loss": 0.7521, "step": 2737 }, { "epoch": 0.18, "grad_norm": 1.532760208199043, "learning_rate": 9.456960415604295e-06, "loss": 0.796, "step": 2738 }, { "epoch": 0.18, "grad_norm": 1.616780968150019, "learning_rate": 9.456490518634688e-06, "loss": 0.7065, "step": 2739 }, { "epoch": 0.18, "grad_norm": 1.578232572501345, "learning_rate": 9.456020430134483e-06, "loss": 0.7201, "step": 2740 }, { "epoch": 0.18, "grad_norm": 1.7317262381995393, "learning_rate": 9.455550150123884e-06, "loss": 0.7283, "step": 2741 }, { "epoch": 0.18, "grad_norm": 1.0259132763532512, "learning_rate": 9.4550796786231e-06, "loss": 0.6408, "step": 2742 }, { "epoch": 0.18, "grad_norm": 1.4335108645074637, "learning_rate": 9.454609015652355e-06, "loss": 0.7159, "step": 2743 }, { "epoch": 0.18, "grad_norm": 1.4422104776390223, "learning_rate": 9.454138161231873e-06, "loss": 0.692, "step": 2744 }, { "epoch": 0.18, "grad_norm": 1.8012158071913227, "learning_rate": 9.453667115381893e-06, "loss": 0.725, "step": 2745 }, { "epoch": 0.18, "grad_norm": 1.3991248069443791, "learning_rate": 9.453195878122659e-06, "loss": 0.7091, "step": 2746 }, { "epoch": 0.18, "grad_norm": 1.5786021490098723, "learning_rate": 9.452724449474423e-06, "loss": 0.6742, "step": 2747 }, { "epoch": 0.18, "grad_norm": 1.46591273988444, "learning_rate": 9.452252829457447e-06, "loss": 0.6758, "step": 2748 }, { "epoch": 0.18, "grad_norm": 1.1472896080690203, "learning_rate": 9.451781018092e-06, "loss": 0.6206, "step": 2749 }, { "epoch": 0.18, "grad_norm": 1.570057149453229, "learning_rate": 9.45130901539836e-06, "loss": 0.7733, "step": 2750 }, { "epoch": 0.18, "grad_norm": 1.955306635309813, "learning_rate": 9.45083682139681e-06, "loss": 0.8576, "step": 2751 }, { "epoch": 0.18, "grad_norm": 1.4971391607328928, "learning_rate": 9.450364436107647e-06, "loss": 0.7108, "step": 2752 }, { "epoch": 0.18, "grad_norm": 1.5266110982201992, "learning_rate": 9.449891859551172e-06, "loss": 0.7445, "step": 2753 }, { "epoch": 0.18, "grad_norm": 1.5856093677507428, "learning_rate": 9.449419091747695e-06, "loss": 0.6798, "step": 2754 }, { "epoch": 0.18, "grad_norm": 1.6243659941526771, "learning_rate": 9.448946132717536e-06, "loss": 0.7634, "step": 2755 }, { "epoch": 0.18, "grad_norm": 1.453657794772818, "learning_rate": 9.448472982481021e-06, "loss": 0.6919, "step": 2756 }, { "epoch": 0.18, "grad_norm": 1.4736375356684586, "learning_rate": 9.447999641058486e-06, "loss": 0.741, "step": 2757 }, { "epoch": 0.18, "grad_norm": 1.125964545722043, "learning_rate": 9.44752610847027e-06, "loss": 0.5665, "step": 2758 }, { "epoch": 0.18, "grad_norm": 1.5171525642211554, "learning_rate": 9.447052384736729e-06, "loss": 0.6984, "step": 2759 }, { "epoch": 0.18, "grad_norm": 1.4907538347443898, "learning_rate": 9.446578469878219e-06, "loss": 0.6734, "step": 2760 }, { "epoch": 0.18, "grad_norm": 1.6823200142075416, "learning_rate": 9.446104363915112e-06, "loss": 0.754, "step": 2761 }, { "epoch": 0.18, "grad_norm": 1.4601801284339928, "learning_rate": 9.445630066867781e-06, "loss": 0.7503, "step": 2762 }, { "epoch": 0.18, "grad_norm": 1.6239669852770267, "learning_rate": 9.445155578756612e-06, "loss": 0.7376, "step": 2763 }, { "epoch": 0.18, "grad_norm": 1.258302664405282, "learning_rate": 9.444680899601997e-06, "loss": 0.7534, "step": 2764 }, { "epoch": 0.18, "grad_norm": 1.809590293019335, "learning_rate": 9.444206029424334e-06, "loss": 0.7037, "step": 2765 }, { "epoch": 0.18, "grad_norm": 1.6885402664611058, "learning_rate": 9.443730968244037e-06, "loss": 0.7148, "step": 2766 }, { "epoch": 0.18, "grad_norm": 1.7176412933536358, "learning_rate": 9.443255716081522e-06, "loss": 0.7238, "step": 2767 }, { "epoch": 0.18, "grad_norm": 1.8802107960843422, "learning_rate": 9.442780272957208e-06, "loss": 0.75, "step": 2768 }, { "epoch": 0.18, "grad_norm": 1.5983640877134175, "learning_rate": 9.442304638891538e-06, "loss": 0.8005, "step": 2769 }, { "epoch": 0.18, "grad_norm": 1.8659843238650802, "learning_rate": 9.441828813904948e-06, "loss": 0.738, "step": 2770 }, { "epoch": 0.18, "grad_norm": 1.6575853509441494, "learning_rate": 9.441352798017886e-06, "loss": 0.8081, "step": 2771 }, { "epoch": 0.18, "grad_norm": 1.5768338955130128, "learning_rate": 9.440876591250817e-06, "loss": 0.7136, "step": 2772 }, { "epoch": 0.18, "grad_norm": 1.923686060550667, "learning_rate": 9.440400193624202e-06, "loss": 0.7879, "step": 2773 }, { "epoch": 0.18, "grad_norm": 1.6914763672466795, "learning_rate": 9.439923605158519e-06, "loss": 0.8049, "step": 2774 }, { "epoch": 0.18, "grad_norm": 1.6604388687101326, "learning_rate": 9.439446825874248e-06, "loss": 0.7247, "step": 2775 }, { "epoch": 0.18, "grad_norm": 1.4771238374993423, "learning_rate": 9.43896985579188e-06, "loss": 0.7698, "step": 2776 }, { "epoch": 0.18, "grad_norm": 1.575852286585767, "learning_rate": 9.438492694931916e-06, "loss": 0.6909, "step": 2777 }, { "epoch": 0.18, "grad_norm": 1.5976384529023022, "learning_rate": 9.438015343314862e-06, "loss": 0.7607, "step": 2778 }, { "epoch": 0.18, "grad_norm": 1.57445704108902, "learning_rate": 9.437537800961235e-06, "loss": 0.7934, "step": 2779 }, { "epoch": 0.18, "grad_norm": 1.66687098699157, "learning_rate": 9.437060067891556e-06, "loss": 0.6854, "step": 2780 }, { "epoch": 0.18, "grad_norm": 1.1912549047632803, "learning_rate": 9.436582144126362e-06, "loss": 0.6935, "step": 2781 }, { "epoch": 0.18, "grad_norm": 1.6270725172586964, "learning_rate": 9.436104029686188e-06, "loss": 0.7048, "step": 2782 }, { "epoch": 0.18, "grad_norm": 1.4838990717630498, "learning_rate": 9.435625724591584e-06, "loss": 0.681, "step": 2783 }, { "epoch": 0.18, "grad_norm": 1.6187506730028645, "learning_rate": 9.435147228863109e-06, "loss": 0.8085, "step": 2784 }, { "epoch": 0.18, "grad_norm": 2.4593380105580143, "learning_rate": 9.434668542521323e-06, "loss": 0.8214, "step": 2785 }, { "epoch": 0.18, "grad_norm": 1.8589914338257376, "learning_rate": 9.434189665586803e-06, "loss": 0.8454, "step": 2786 }, { "epoch": 0.18, "grad_norm": 1.6195157899829855, "learning_rate": 9.433710598080128e-06, "loss": 0.7744, "step": 2787 }, { "epoch": 0.18, "grad_norm": 3.538300031459101, "learning_rate": 9.43323134002189e-06, "loss": 0.6464, "step": 2788 }, { "epoch": 0.18, "grad_norm": 1.2347995555277624, "learning_rate": 9.432751891432681e-06, "loss": 0.6166, "step": 2789 }, { "epoch": 0.18, "grad_norm": 1.2451708263701797, "learning_rate": 9.432272252333112e-06, "loss": 0.6207, "step": 2790 }, { "epoch": 0.18, "grad_norm": 1.705012769057445, "learning_rate": 9.431792422743795e-06, "loss": 0.7585, "step": 2791 }, { "epoch": 0.18, "grad_norm": 1.238666053788798, "learning_rate": 9.431312402685354e-06, "loss": 0.7128, "step": 2792 }, { "epoch": 0.18, "grad_norm": 1.5338909295839234, "learning_rate": 9.430832192178413e-06, "loss": 0.617, "step": 2793 }, { "epoch": 0.18, "grad_norm": 2.8589974288878004, "learning_rate": 9.43035179124362e-06, "loss": 0.7638, "step": 2794 }, { "epoch": 0.18, "grad_norm": 1.7332150343869102, "learning_rate": 9.429871199901614e-06, "loss": 0.8398, "step": 2795 }, { "epoch": 0.18, "grad_norm": 1.7718184645968589, "learning_rate": 9.429390418173052e-06, "loss": 0.7033, "step": 2796 }, { "epoch": 0.18, "grad_norm": 1.4148602332230633, "learning_rate": 9.428909446078597e-06, "loss": 0.6948, "step": 2797 }, { "epoch": 0.18, "grad_norm": 1.716404269429339, "learning_rate": 9.428428283638922e-06, "loss": 0.7308, "step": 2798 }, { "epoch": 0.18, "grad_norm": 1.4617535094223697, "learning_rate": 9.427946930874704e-06, "loss": 0.8205, "step": 2799 }, { "epoch": 0.18, "grad_norm": 1.594653760200635, "learning_rate": 9.42746538780663e-06, "loss": 0.6231, "step": 2800 }, { "epoch": 0.18, "grad_norm": 1.7017906380803436, "learning_rate": 9.426983654455399e-06, "loss": 0.737, "step": 2801 }, { "epoch": 0.18, "grad_norm": 2.265754983609434, "learning_rate": 9.42650173084171e-06, "loss": 0.8092, "step": 2802 }, { "epoch": 0.18, "grad_norm": 1.5766763810493505, "learning_rate": 9.426019616986281e-06, "loss": 0.7551, "step": 2803 }, { "epoch": 0.18, "grad_norm": 1.6683279495136503, "learning_rate": 9.425537312909828e-06, "loss": 0.7607, "step": 2804 }, { "epoch": 0.18, "grad_norm": 1.4358404085983534, "learning_rate": 9.425054818633081e-06, "loss": 0.7305, "step": 2805 }, { "epoch": 0.18, "grad_norm": 1.6483724917625902, "learning_rate": 9.424572134176776e-06, "loss": 0.7547, "step": 2806 }, { "epoch": 0.18, "grad_norm": 1.5904926457921795, "learning_rate": 9.424089259561658e-06, "loss": 0.6824, "step": 2807 }, { "epoch": 0.18, "grad_norm": 1.3822835314632316, "learning_rate": 9.42360619480848e-06, "loss": 0.6601, "step": 2808 }, { "epoch": 0.18, "grad_norm": 2.0081559608085073, "learning_rate": 9.423122939938003e-06, "loss": 0.8664, "step": 2809 }, { "epoch": 0.18, "grad_norm": 1.5851879515072205, "learning_rate": 9.422639494970996e-06, "loss": 0.6733, "step": 2810 }, { "epoch": 0.18, "grad_norm": 1.6268482716062753, "learning_rate": 9.422155859928237e-06, "loss": 0.713, "step": 2811 }, { "epoch": 0.18, "grad_norm": 1.462626275101166, "learning_rate": 9.421672034830511e-06, "loss": 0.7502, "step": 2812 }, { "epoch": 0.18, "grad_norm": 1.4688076130552123, "learning_rate": 9.421188019698613e-06, "loss": 0.7564, "step": 2813 }, { "epoch": 0.18, "grad_norm": 1.607141483903157, "learning_rate": 9.420703814553343e-06, "loss": 0.8059, "step": 2814 }, { "epoch": 0.18, "grad_norm": 1.2406240719375803, "learning_rate": 9.420219419415513e-06, "loss": 0.654, "step": 2815 }, { "epoch": 0.18, "grad_norm": 1.464581919989706, "learning_rate": 9.419734834305942e-06, "loss": 0.7202, "step": 2816 }, { "epoch": 0.18, "grad_norm": 1.6025474027156557, "learning_rate": 9.419250059245453e-06, "loss": 0.7224, "step": 2817 }, { "epoch": 0.18, "grad_norm": 1.625291226899003, "learning_rate": 9.418765094254882e-06, "loss": 0.7668, "step": 2818 }, { "epoch": 0.18, "grad_norm": 1.6436352126576323, "learning_rate": 9.418279939355073e-06, "loss": 0.7717, "step": 2819 }, { "epoch": 0.18, "grad_norm": 1.0595351918873221, "learning_rate": 9.417794594566878e-06, "loss": 0.667, "step": 2820 }, { "epoch": 0.18, "grad_norm": 1.5873876524615878, "learning_rate": 9.417309059911152e-06, "loss": 0.7961, "step": 2821 }, { "epoch": 0.18, "grad_norm": 1.712903814489549, "learning_rate": 9.416823335408766e-06, "loss": 0.7749, "step": 2822 }, { "epoch": 0.18, "grad_norm": 1.6772583458031824, "learning_rate": 9.416337421080594e-06, "loss": 0.7403, "step": 2823 }, { "epoch": 0.18, "grad_norm": 1.5223477705439916, "learning_rate": 9.41585131694752e-06, "loss": 0.7715, "step": 2824 }, { "epoch": 0.18, "grad_norm": 1.5401673183785012, "learning_rate": 9.415365023030433e-06, "loss": 0.6949, "step": 2825 }, { "epoch": 0.18, "grad_norm": 1.843514407516921, "learning_rate": 9.414878539350237e-06, "loss": 0.74, "step": 2826 }, { "epoch": 0.18, "grad_norm": 1.8128235222598759, "learning_rate": 9.41439186592784e-06, "loss": 0.6938, "step": 2827 }, { "epoch": 0.18, "grad_norm": 1.5292266812665998, "learning_rate": 9.413905002784153e-06, "loss": 0.6473, "step": 2828 }, { "epoch": 0.18, "grad_norm": 1.5160752264684603, "learning_rate": 9.413417949940107e-06, "loss": 0.6456, "step": 2829 }, { "epoch": 0.18, "grad_norm": 1.64816592698945, "learning_rate": 9.41293070741663e-06, "loss": 0.7281, "step": 2830 }, { "epoch": 0.18, "grad_norm": 1.521584490923835, "learning_rate": 9.412443275234663e-06, "loss": 0.7458, "step": 2831 }, { "epoch": 0.18, "grad_norm": 1.51277804705111, "learning_rate": 9.411955653415157e-06, "loss": 0.6925, "step": 2832 }, { "epoch": 0.18, "grad_norm": 1.2149858975786265, "learning_rate": 9.411467841979069e-06, "loss": 0.7161, "step": 2833 }, { "epoch": 0.18, "grad_norm": 1.5628762424364155, "learning_rate": 9.41097984094736e-06, "loss": 0.7304, "step": 2834 }, { "epoch": 0.18, "grad_norm": 1.732806951026637, "learning_rate": 9.410491650341009e-06, "loss": 0.7767, "step": 2835 }, { "epoch": 0.18, "grad_norm": 1.5829356507445371, "learning_rate": 9.410003270180992e-06, "loss": 0.7923, "step": 2836 }, { "epoch": 0.18, "grad_norm": 1.548632912959247, "learning_rate": 9.409514700488304e-06, "loss": 0.7401, "step": 2837 }, { "epoch": 0.18, "grad_norm": 1.1323943578152753, "learning_rate": 9.409025941283937e-06, "loss": 0.6427, "step": 2838 }, { "epoch": 0.18, "grad_norm": 1.5364399977057912, "learning_rate": 9.408536992588903e-06, "loss": 0.6364, "step": 2839 }, { "epoch": 0.18, "grad_norm": 1.5246609342337079, "learning_rate": 9.40804785442421e-06, "loss": 0.7673, "step": 2840 }, { "epoch": 0.18, "grad_norm": 1.6170008887634508, "learning_rate": 9.407558526810884e-06, "loss": 0.7996, "step": 2841 }, { "epoch": 0.18, "grad_norm": 1.8003675637662673, "learning_rate": 9.407069009769953e-06, "loss": 0.7035, "step": 2842 }, { "epoch": 0.18, "grad_norm": 1.4193867938495142, "learning_rate": 9.406579303322458e-06, "loss": 0.6825, "step": 2843 }, { "epoch": 0.18, "grad_norm": 1.569590540288521, "learning_rate": 9.406089407489443e-06, "loss": 0.6951, "step": 2844 }, { "epoch": 0.18, "grad_norm": 1.4666785765012302, "learning_rate": 9.405599322291963e-06, "loss": 0.7707, "step": 2845 }, { "epoch": 0.18, "grad_norm": 1.448056750696968, "learning_rate": 9.405109047751083e-06, "loss": 0.6754, "step": 2846 }, { "epoch": 0.18, "grad_norm": 1.6409056352721396, "learning_rate": 9.404618583887872e-06, "loss": 0.7416, "step": 2847 }, { "epoch": 0.18, "grad_norm": 1.6707766186478075, "learning_rate": 9.40412793072341e-06, "loss": 0.8089, "step": 2848 }, { "epoch": 0.18, "grad_norm": 1.1963466140986496, "learning_rate": 9.403637088278784e-06, "loss": 0.6686, "step": 2849 }, { "epoch": 0.18, "grad_norm": 1.7935947627802942, "learning_rate": 9.403146056575088e-06, "loss": 0.771, "step": 2850 }, { "epoch": 0.18, "grad_norm": 1.371583885693036, "learning_rate": 9.402654835633429e-06, "loss": 0.7309, "step": 2851 }, { "epoch": 0.18, "grad_norm": 1.4722166458444392, "learning_rate": 9.402163425474914e-06, "loss": 0.6788, "step": 2852 }, { "epoch": 0.18, "grad_norm": 1.526658845960247, "learning_rate": 9.401671826120667e-06, "loss": 0.7245, "step": 2853 }, { "epoch": 0.18, "grad_norm": 1.1571731754053913, "learning_rate": 9.401180037591813e-06, "loss": 0.7273, "step": 2854 }, { "epoch": 0.18, "grad_norm": 1.4648551464444481, "learning_rate": 9.400688059909489e-06, "loss": 0.7002, "step": 2855 }, { "epoch": 0.18, "grad_norm": 1.5360111717376372, "learning_rate": 9.40019589309484e-06, "loss": 0.6219, "step": 2856 }, { "epoch": 0.18, "grad_norm": 1.6526650930345321, "learning_rate": 9.399703537169017e-06, "loss": 0.6673, "step": 2857 }, { "epoch": 0.18, "grad_norm": 1.5964923324793705, "learning_rate": 9.399210992153181e-06, "loss": 0.7622, "step": 2858 }, { "epoch": 0.18, "grad_norm": 1.698573446860226, "learning_rate": 9.398718258068502e-06, "loss": 0.8507, "step": 2859 }, { "epoch": 0.18, "grad_norm": 1.3862108785961005, "learning_rate": 9.398225334936153e-06, "loss": 0.6258, "step": 2860 }, { "epoch": 0.18, "grad_norm": 1.618362415748395, "learning_rate": 9.397732222777323e-06, "loss": 0.7156, "step": 2861 }, { "epoch": 0.18, "grad_norm": 1.9187899406283146, "learning_rate": 9.397238921613202e-06, "loss": 0.7254, "step": 2862 }, { "epoch": 0.18, "grad_norm": 1.6699595866429016, "learning_rate": 9.396745431464993e-06, "loss": 0.8145, "step": 2863 }, { "epoch": 0.18, "grad_norm": 1.7340543661160528, "learning_rate": 9.396251752353903e-06, "loss": 0.6702, "step": 2864 }, { "epoch": 0.18, "grad_norm": 1.390442957384867, "learning_rate": 9.395757884301152e-06, "loss": 0.7363, "step": 2865 }, { "epoch": 0.18, "grad_norm": 1.4681130350363152, "learning_rate": 9.395263827327963e-06, "loss": 0.7536, "step": 2866 }, { "epoch": 0.18, "grad_norm": 1.6970955521240514, "learning_rate": 9.394769581455569e-06, "loss": 0.7212, "step": 2867 }, { "epoch": 0.18, "grad_norm": 1.4235185595618294, "learning_rate": 9.394275146705214e-06, "loss": 0.7084, "step": 2868 }, { "epoch": 0.18, "grad_norm": 1.5893588516450587, "learning_rate": 9.393780523098148e-06, "loss": 0.8026, "step": 2869 }, { "epoch": 0.18, "grad_norm": 1.5040457281082367, "learning_rate": 9.393285710655626e-06, "loss": 0.7875, "step": 2870 }, { "epoch": 0.18, "grad_norm": 1.3904471039976671, "learning_rate": 9.392790709398916e-06, "loss": 0.729, "step": 2871 }, { "epoch": 0.18, "grad_norm": 1.4576499389788367, "learning_rate": 9.392295519349293e-06, "loss": 0.731, "step": 2872 }, { "epoch": 0.18, "grad_norm": 1.4242355256089874, "learning_rate": 9.391800140528038e-06, "loss": 0.7118, "step": 2873 }, { "epoch": 0.18, "grad_norm": 1.6563725056512477, "learning_rate": 9.391304572956442e-06, "loss": 0.758, "step": 2874 }, { "epoch": 0.18, "grad_norm": 1.075754664699144, "learning_rate": 9.390808816655801e-06, "loss": 0.5279, "step": 2875 }, { "epoch": 0.18, "grad_norm": 1.6142638513882346, "learning_rate": 9.390312871647423e-06, "loss": 0.7262, "step": 2876 }, { "epoch": 0.18, "grad_norm": 2.1957890393213173, "learning_rate": 9.389816737952624e-06, "loss": 0.679, "step": 2877 }, { "epoch": 0.18, "grad_norm": 1.7261495092302266, "learning_rate": 9.389320415592726e-06, "loss": 0.8035, "step": 2878 }, { "epoch": 0.18, "grad_norm": 1.645549444047817, "learning_rate": 9.388823904589062e-06, "loss": 0.7563, "step": 2879 }, { "epoch": 0.18, "grad_norm": 1.009069635819418, "learning_rate": 9.388327204962966e-06, "loss": 0.6964, "step": 2880 }, { "epoch": 0.18, "grad_norm": 1.376632950345545, "learning_rate": 9.387830316735789e-06, "loss": 0.6576, "step": 2881 }, { "epoch": 0.18, "grad_norm": 1.925077382154585, "learning_rate": 9.387333239928883e-06, "loss": 0.7567, "step": 2882 }, { "epoch": 0.18, "grad_norm": 1.372401461071481, "learning_rate": 9.386835974563616e-06, "loss": 0.6657, "step": 2883 }, { "epoch": 0.18, "grad_norm": 1.6669158723946658, "learning_rate": 9.386338520661355e-06, "loss": 0.7396, "step": 2884 }, { "epoch": 0.18, "grad_norm": 1.5742809311952592, "learning_rate": 9.385840878243482e-06, "loss": 0.7556, "step": 2885 }, { "epoch": 0.18, "grad_norm": 1.5141565734844262, "learning_rate": 9.385343047331385e-06, "loss": 0.6903, "step": 2886 }, { "epoch": 0.18, "grad_norm": 1.7552449465948063, "learning_rate": 9.384845027946458e-06, "loss": 0.796, "step": 2887 }, { "epoch": 0.18, "grad_norm": 1.707611323325944, "learning_rate": 9.384346820110107e-06, "loss": 0.7213, "step": 2888 }, { "epoch": 0.18, "grad_norm": 1.984970881554028, "learning_rate": 9.38384842384374e-06, "loss": 0.8207, "step": 2889 }, { "epoch": 0.18, "grad_norm": 1.5709793743435552, "learning_rate": 9.383349839168781e-06, "loss": 0.7665, "step": 2890 }, { "epoch": 0.19, "grad_norm": 1.5879751735224126, "learning_rate": 9.382851066106655e-06, "loss": 0.7307, "step": 2891 }, { "epoch": 0.19, "grad_norm": 1.448920581700988, "learning_rate": 9.3823521046788e-06, "loss": 0.708, "step": 2892 }, { "epoch": 0.19, "grad_norm": 1.612972403311106, "learning_rate": 9.381852954906662e-06, "loss": 0.782, "step": 2893 }, { "epoch": 0.19, "grad_norm": 1.5389692944423656, "learning_rate": 9.38135361681169e-06, "loss": 0.7502, "step": 2894 }, { "epoch": 0.19, "grad_norm": 1.4583265531536376, "learning_rate": 9.380854090415347e-06, "loss": 0.8315, "step": 2895 }, { "epoch": 0.19, "grad_norm": 1.3379883507978, "learning_rate": 9.3803543757391e-06, "loss": 0.6826, "step": 2896 }, { "epoch": 0.19, "grad_norm": 1.6412367595980129, "learning_rate": 9.379854472804425e-06, "loss": 0.7555, "step": 2897 }, { "epoch": 0.19, "grad_norm": 1.4890265247112342, "learning_rate": 9.37935438163281e-06, "loss": 0.7009, "step": 2898 }, { "epoch": 0.19, "grad_norm": 1.2797201557728302, "learning_rate": 9.378854102245747e-06, "loss": 0.6993, "step": 2899 }, { "epoch": 0.19, "grad_norm": 1.5776865672367817, "learning_rate": 9.378353634664735e-06, "loss": 0.7374, "step": 2900 }, { "epoch": 0.19, "grad_norm": 1.5962568852319836, "learning_rate": 9.377852978911282e-06, "loss": 0.7234, "step": 2901 }, { "epoch": 0.19, "grad_norm": 1.0928631851133273, "learning_rate": 9.377352135006909e-06, "loss": 0.6748, "step": 2902 }, { "epoch": 0.19, "grad_norm": 1.421643322888126, "learning_rate": 9.376851102973139e-06, "loss": 0.7032, "step": 2903 }, { "epoch": 0.19, "grad_norm": 1.8425474668071355, "learning_rate": 9.376349882831507e-06, "loss": 0.7698, "step": 2904 }, { "epoch": 0.19, "grad_norm": 1.5634178298927128, "learning_rate": 9.375848474603549e-06, "loss": 0.725, "step": 2905 }, { "epoch": 0.19, "grad_norm": 1.5441639856398506, "learning_rate": 9.375346878310822e-06, "loss": 0.7207, "step": 2906 }, { "epoch": 0.19, "grad_norm": 1.1426545145907545, "learning_rate": 9.374845093974879e-06, "loss": 0.7068, "step": 2907 }, { "epoch": 0.19, "grad_norm": 1.2561915899580929, "learning_rate": 9.374343121617286e-06, "loss": 0.7178, "step": 2908 }, { "epoch": 0.19, "grad_norm": 1.4773238855733648, "learning_rate": 9.373840961259619e-06, "loss": 0.7635, "step": 2909 }, { "epoch": 0.19, "grad_norm": 1.5316512629340873, "learning_rate": 9.373338612923457e-06, "loss": 0.7531, "step": 2910 }, { "epoch": 0.19, "grad_norm": 1.1077271576257242, "learning_rate": 9.372836076630391e-06, "loss": 0.6753, "step": 2911 }, { "epoch": 0.19, "grad_norm": 1.6490837399255254, "learning_rate": 9.372333352402019e-06, "loss": 0.6985, "step": 2912 }, { "epoch": 0.19, "grad_norm": 1.4686130951876823, "learning_rate": 9.371830440259948e-06, "loss": 0.665, "step": 2913 }, { "epoch": 0.19, "grad_norm": 1.4758077109160936, "learning_rate": 9.371327340225791e-06, "loss": 0.7875, "step": 2914 }, { "epoch": 0.19, "grad_norm": 1.4390370739131424, "learning_rate": 9.370824052321168e-06, "loss": 0.6815, "step": 2915 }, { "epoch": 0.19, "grad_norm": 2.01964182531399, "learning_rate": 9.370320576567715e-06, "loss": 0.797, "step": 2916 }, { "epoch": 0.19, "grad_norm": 1.471816021679086, "learning_rate": 9.369816912987066e-06, "loss": 0.6438, "step": 2917 }, { "epoch": 0.19, "grad_norm": 1.750726835413227, "learning_rate": 9.369313061600867e-06, "loss": 0.7881, "step": 2918 }, { "epoch": 0.19, "grad_norm": 1.5136282670476626, "learning_rate": 9.368809022430773e-06, "loss": 0.7111, "step": 2919 }, { "epoch": 0.19, "grad_norm": 1.6745332610932344, "learning_rate": 9.36830479549845e-06, "loss": 0.7048, "step": 2920 }, { "epoch": 0.19, "grad_norm": 1.5260961546672243, "learning_rate": 9.367800380825564e-06, "loss": 0.7044, "step": 2921 }, { "epoch": 0.19, "grad_norm": 2.3598276561637435, "learning_rate": 9.367295778433798e-06, "loss": 0.8043, "step": 2922 }, { "epoch": 0.19, "grad_norm": 1.4344074574842927, "learning_rate": 9.366790988344835e-06, "loss": 0.7562, "step": 2923 }, { "epoch": 0.19, "grad_norm": 1.7188446775319821, "learning_rate": 9.36628601058037e-06, "loss": 0.7001, "step": 2924 }, { "epoch": 0.19, "grad_norm": 1.6583737686896127, "learning_rate": 9.365780845162109e-06, "loss": 0.7883, "step": 2925 }, { "epoch": 0.19, "grad_norm": 1.7375286562723986, "learning_rate": 9.365275492111761e-06, "loss": 0.7221, "step": 2926 }, { "epoch": 0.19, "grad_norm": 1.2177474258910073, "learning_rate": 9.364769951451045e-06, "loss": 0.6372, "step": 2927 }, { "epoch": 0.19, "grad_norm": 1.7876389170261169, "learning_rate": 9.364264223201687e-06, "loss": 0.879, "step": 2928 }, { "epoch": 0.19, "grad_norm": 1.3758096165801315, "learning_rate": 9.363758307385423e-06, "loss": 0.7384, "step": 2929 }, { "epoch": 0.19, "grad_norm": 1.2280723698980514, "learning_rate": 9.363252204023999e-06, "loss": 0.7171, "step": 2930 }, { "epoch": 0.19, "grad_norm": 1.2071429085298564, "learning_rate": 9.36274591313916e-06, "loss": 0.6848, "step": 2931 }, { "epoch": 0.19, "grad_norm": 1.5730975008504287, "learning_rate": 9.36223943475267e-06, "loss": 0.7434, "step": 2932 }, { "epoch": 0.19, "grad_norm": 1.4492122866403103, "learning_rate": 9.361732768886299e-06, "loss": 0.6409, "step": 2933 }, { "epoch": 0.19, "grad_norm": 1.5038617799778988, "learning_rate": 9.361225915561815e-06, "loss": 0.6846, "step": 2934 }, { "epoch": 0.19, "grad_norm": 1.8130349398009575, "learning_rate": 9.360718874801007e-06, "loss": 0.8511, "step": 2935 }, { "epoch": 0.19, "grad_norm": 1.3327422485591367, "learning_rate": 9.360211646625664e-06, "loss": 0.7422, "step": 2936 }, { "epoch": 0.19, "grad_norm": 1.5786987445330942, "learning_rate": 9.359704231057587e-06, "loss": 0.7973, "step": 2937 }, { "epoch": 0.19, "grad_norm": 1.3003640265173024, "learning_rate": 9.359196628118584e-06, "loss": 0.6513, "step": 2938 }, { "epoch": 0.19, "grad_norm": 1.6793769342994915, "learning_rate": 9.358688837830469e-06, "loss": 0.7939, "step": 2939 }, { "epoch": 0.19, "grad_norm": 1.6951856509754186, "learning_rate": 9.358180860215065e-06, "loss": 0.6914, "step": 2940 }, { "epoch": 0.19, "grad_norm": 1.3052492098423618, "learning_rate": 9.357672695294208e-06, "loss": 0.5929, "step": 2941 }, { "epoch": 0.19, "grad_norm": 1.6217161187541742, "learning_rate": 9.357164343089734e-06, "loss": 0.7445, "step": 2942 }, { "epoch": 0.19, "grad_norm": 1.5374201885524241, "learning_rate": 9.356655803623493e-06, "loss": 0.7194, "step": 2943 }, { "epoch": 0.19, "grad_norm": 1.5609624285555952, "learning_rate": 9.35614707691734e-06, "loss": 0.7528, "step": 2944 }, { "epoch": 0.19, "grad_norm": 1.5861226385138656, "learning_rate": 9.355638162993139e-06, "loss": 0.7295, "step": 2945 }, { "epoch": 0.19, "grad_norm": 1.5094806481737686, "learning_rate": 9.355129061872762e-06, "loss": 0.7595, "step": 2946 }, { "epoch": 0.19, "grad_norm": 1.5342321839341264, "learning_rate": 9.354619773578088e-06, "loss": 0.7676, "step": 2947 }, { "epoch": 0.19, "grad_norm": 1.7925433038907599, "learning_rate": 9.35411029813101e-06, "loss": 0.7902, "step": 2948 }, { "epoch": 0.19, "grad_norm": 1.5620898051224497, "learning_rate": 9.353600635553418e-06, "loss": 0.7588, "step": 2949 }, { "epoch": 0.19, "grad_norm": 1.5074780836625508, "learning_rate": 9.353090785867219e-06, "loss": 0.6916, "step": 2950 }, { "epoch": 0.19, "grad_norm": 1.6306684556591384, "learning_rate": 9.352580749094325e-06, "loss": 0.7626, "step": 2951 }, { "epoch": 0.19, "grad_norm": 4.350656357155622, "learning_rate": 9.352070525256656e-06, "loss": 0.7419, "step": 2952 }, { "epoch": 0.19, "grad_norm": 1.4759694837047006, "learning_rate": 9.351560114376142e-06, "loss": 0.7142, "step": 2953 }, { "epoch": 0.19, "grad_norm": 1.4114535213398967, "learning_rate": 9.35104951647472e-06, "loss": 0.6857, "step": 2954 }, { "epoch": 0.19, "grad_norm": 1.438836414508276, "learning_rate": 9.350538731574329e-06, "loss": 0.6601, "step": 2955 }, { "epoch": 0.19, "grad_norm": 1.4655805865528417, "learning_rate": 9.350027759696928e-06, "loss": 0.6734, "step": 2956 }, { "epoch": 0.19, "grad_norm": 1.4445935420680835, "learning_rate": 9.349516600864473e-06, "loss": 0.7045, "step": 2957 }, { "epoch": 0.19, "grad_norm": 1.6081502604282931, "learning_rate": 9.349005255098932e-06, "loss": 0.7634, "step": 2958 }, { "epoch": 0.19, "grad_norm": 0.9868764326205892, "learning_rate": 9.348493722422288e-06, "loss": 0.5901, "step": 2959 }, { "epoch": 0.19, "grad_norm": 1.5311985732590359, "learning_rate": 9.347982002856517e-06, "loss": 0.6623, "step": 2960 }, { "epoch": 0.19, "grad_norm": 1.424493548533667, "learning_rate": 9.347470096423618e-06, "loss": 0.7435, "step": 2961 }, { "epoch": 0.19, "grad_norm": 1.7242653874743707, "learning_rate": 9.34695800314559e-06, "loss": 0.7631, "step": 2962 }, { "epoch": 0.19, "grad_norm": 1.5727893885724402, "learning_rate": 9.346445723044441e-06, "loss": 0.7686, "step": 2963 }, { "epoch": 0.19, "grad_norm": 1.5079565776650885, "learning_rate": 9.345933256142187e-06, "loss": 0.7529, "step": 2964 }, { "epoch": 0.19, "grad_norm": 1.9049257650944436, "learning_rate": 9.345420602460856e-06, "loss": 0.7334, "step": 2965 }, { "epoch": 0.19, "grad_norm": 1.6379226184566622, "learning_rate": 9.344907762022476e-06, "loss": 0.7873, "step": 2966 }, { "epoch": 0.19, "grad_norm": 1.5688380047699892, "learning_rate": 9.344394734849092e-06, "loss": 0.6549, "step": 2967 }, { "epoch": 0.19, "grad_norm": 1.59821390908782, "learning_rate": 9.343881520962749e-06, "loss": 0.6821, "step": 2968 }, { "epoch": 0.19, "grad_norm": 1.4910261147241044, "learning_rate": 9.343368120385508e-06, "loss": 0.7478, "step": 2969 }, { "epoch": 0.19, "grad_norm": 1.6146295219171707, "learning_rate": 9.342854533139431e-06, "loss": 0.7759, "step": 2970 }, { "epoch": 0.19, "grad_norm": 1.539286210389746, "learning_rate": 9.342340759246591e-06, "loss": 0.8376, "step": 2971 }, { "epoch": 0.19, "grad_norm": 1.4516029803797086, "learning_rate": 9.341826798729071e-06, "loss": 0.7675, "step": 2972 }, { "epoch": 0.19, "grad_norm": 1.7238410865928402, "learning_rate": 9.341312651608957e-06, "loss": 0.7888, "step": 2973 }, { "epoch": 0.19, "grad_norm": 1.0980572548237073, "learning_rate": 9.34079831790835e-06, "loss": 0.6278, "step": 2974 }, { "epoch": 0.19, "grad_norm": 1.6074632688654038, "learning_rate": 9.340283797649352e-06, "loss": 0.7911, "step": 2975 }, { "epoch": 0.19, "grad_norm": 1.0950023787908358, "learning_rate": 9.339769090854075e-06, "loss": 0.6327, "step": 2976 }, { "epoch": 0.19, "grad_norm": 1.5779448066959123, "learning_rate": 9.339254197544642e-06, "loss": 0.6733, "step": 2977 }, { "epoch": 0.19, "grad_norm": 1.749119901051545, "learning_rate": 9.338739117743183e-06, "loss": 0.8331, "step": 2978 }, { "epoch": 0.19, "grad_norm": 1.73343793258976, "learning_rate": 9.338223851471833e-06, "loss": 0.7071, "step": 2979 }, { "epoch": 0.19, "grad_norm": 1.5297752564908547, "learning_rate": 9.337708398752738e-06, "loss": 0.7994, "step": 2980 }, { "epoch": 0.19, "grad_norm": 1.6226241673250268, "learning_rate": 9.33719275960805e-06, "loss": 0.7408, "step": 2981 }, { "epoch": 0.19, "grad_norm": 1.4295668287620873, "learning_rate": 9.336676934059932e-06, "loss": 0.7532, "step": 2982 }, { "epoch": 0.19, "grad_norm": 1.3911381081476581, "learning_rate": 9.33616092213055e-06, "loss": 0.6909, "step": 2983 }, { "epoch": 0.19, "grad_norm": 1.8957252971124903, "learning_rate": 9.335644723842086e-06, "loss": 0.8678, "step": 2984 }, { "epoch": 0.19, "grad_norm": 1.2952416744306954, "learning_rate": 9.335128339216719e-06, "loss": 0.6877, "step": 2985 }, { "epoch": 0.19, "grad_norm": 1.5670438631674717, "learning_rate": 9.334611768276648e-06, "loss": 0.7544, "step": 2986 }, { "epoch": 0.19, "grad_norm": 1.5328461439199244, "learning_rate": 9.33409501104407e-06, "loss": 0.7803, "step": 2987 }, { "epoch": 0.19, "grad_norm": 1.612306088626684, "learning_rate": 9.333578067541196e-06, "loss": 0.7585, "step": 2988 }, { "epoch": 0.19, "grad_norm": 1.6590441526409079, "learning_rate": 9.333060937790243e-06, "loss": 0.7124, "step": 2989 }, { "epoch": 0.19, "grad_norm": 1.5761767346241722, "learning_rate": 9.332543621813434e-06, "loss": 0.7015, "step": 2990 }, { "epoch": 0.19, "grad_norm": 1.636497583705717, "learning_rate": 9.332026119633007e-06, "loss": 0.8173, "step": 2991 }, { "epoch": 0.19, "grad_norm": 1.6186503129159495, "learning_rate": 9.3315084312712e-06, "loss": 0.7939, "step": 2992 }, { "epoch": 0.19, "grad_norm": 1.6100497746878772, "learning_rate": 9.33099055675026e-06, "loss": 0.7654, "step": 2993 }, { "epoch": 0.19, "grad_norm": 1.2426355770043083, "learning_rate": 9.330472496092449e-06, "loss": 0.729, "step": 2994 }, { "epoch": 0.19, "grad_norm": 1.811283044020736, "learning_rate": 9.329954249320028e-06, "loss": 0.8154, "step": 2995 }, { "epoch": 0.19, "grad_norm": 1.5553244110378923, "learning_rate": 9.329435816455273e-06, "loss": 0.7653, "step": 2996 }, { "epoch": 0.19, "grad_norm": 1.591618809030736, "learning_rate": 9.328917197520461e-06, "loss": 0.7297, "step": 2997 }, { "epoch": 0.19, "grad_norm": 1.6442639476510645, "learning_rate": 9.328398392537887e-06, "loss": 0.7209, "step": 2998 }, { "epoch": 0.19, "grad_norm": 1.4240214105730982, "learning_rate": 9.327879401529846e-06, "loss": 0.7925, "step": 2999 }, { "epoch": 0.19, "grad_norm": 1.4507560891025064, "learning_rate": 9.327360224518641e-06, "loss": 0.7603, "step": 3000 }, { "epoch": 0.19, "grad_norm": 2.335617882389065, "learning_rate": 9.326840861526587e-06, "loss": 0.7194, "step": 3001 }, { "epoch": 0.19, "grad_norm": 1.8249421850271867, "learning_rate": 9.326321312576006e-06, "loss": 0.7561, "step": 3002 }, { "epoch": 0.19, "grad_norm": 1.615987771141386, "learning_rate": 9.325801577689224e-06, "loss": 0.7395, "step": 3003 }, { "epoch": 0.19, "grad_norm": 1.7743733637919181, "learning_rate": 9.325281656888579e-06, "loss": 0.7449, "step": 3004 }, { "epoch": 0.19, "grad_norm": 1.5601536419492046, "learning_rate": 9.32476155019642e-06, "loss": 0.7519, "step": 3005 }, { "epoch": 0.19, "grad_norm": 1.533974156184609, "learning_rate": 9.324241257635095e-06, "loss": 0.7223, "step": 3006 }, { "epoch": 0.19, "grad_norm": 1.534008108831094, "learning_rate": 9.323720779226969e-06, "loss": 0.7304, "step": 3007 }, { "epoch": 0.19, "grad_norm": 1.4633383074856614, "learning_rate": 9.323200114994407e-06, "loss": 0.7373, "step": 3008 }, { "epoch": 0.19, "grad_norm": 1.4980430436688836, "learning_rate": 9.322679264959789e-06, "loss": 0.7839, "step": 3009 }, { "epoch": 0.19, "grad_norm": 1.480036667679405, "learning_rate": 9.322158229145501e-06, "loss": 0.7135, "step": 3010 }, { "epoch": 0.19, "grad_norm": 1.593085005184277, "learning_rate": 9.321637007573932e-06, "loss": 0.7096, "step": 3011 }, { "epoch": 0.19, "grad_norm": 1.4716320326755612, "learning_rate": 9.321115600267488e-06, "loss": 0.6577, "step": 3012 }, { "epoch": 0.19, "grad_norm": 1.5609823891312902, "learning_rate": 9.320594007248573e-06, "loss": 0.716, "step": 3013 }, { "epoch": 0.19, "grad_norm": 1.5989195319157683, "learning_rate": 9.320072228539607e-06, "loss": 0.7028, "step": 3014 }, { "epoch": 0.19, "grad_norm": 1.3818555187198434, "learning_rate": 9.319550264163015e-06, "loss": 0.6604, "step": 3015 }, { "epoch": 0.19, "grad_norm": 0.9711632689301649, "learning_rate": 9.31902811414123e-06, "loss": 0.5987, "step": 3016 }, { "epoch": 0.19, "grad_norm": 2.044914824351043, "learning_rate": 9.318505778496692e-06, "loss": 0.8372, "step": 3017 }, { "epoch": 0.19, "grad_norm": 1.1973138837462978, "learning_rate": 9.317983257251848e-06, "loss": 0.6614, "step": 3018 }, { "epoch": 0.19, "grad_norm": 1.6899821070008214, "learning_rate": 9.317460550429159e-06, "loss": 0.7643, "step": 3019 }, { "epoch": 0.19, "grad_norm": 1.6590006950726517, "learning_rate": 9.316937658051088e-06, "loss": 0.7821, "step": 3020 }, { "epoch": 0.19, "grad_norm": 1.4523943890302462, "learning_rate": 9.316414580140105e-06, "loss": 0.6905, "step": 3021 }, { "epoch": 0.19, "grad_norm": 1.7504325753877978, "learning_rate": 9.315891316718696e-06, "loss": 0.7394, "step": 3022 }, { "epoch": 0.19, "grad_norm": 2.014463385432239, "learning_rate": 9.315367867809346e-06, "loss": 0.7177, "step": 3023 }, { "epoch": 0.19, "grad_norm": 1.4127503275961488, "learning_rate": 9.314844233434553e-06, "loss": 0.7095, "step": 3024 }, { "epoch": 0.19, "grad_norm": 1.6571353057672547, "learning_rate": 9.314320413616821e-06, "loss": 0.7369, "step": 3025 }, { "epoch": 0.19, "grad_norm": 1.5055048344688011, "learning_rate": 9.313796408378666e-06, "loss": 0.6669, "step": 3026 }, { "epoch": 0.19, "grad_norm": 1.5054695768035018, "learning_rate": 9.313272217742604e-06, "loss": 0.6801, "step": 3027 }, { "epoch": 0.19, "grad_norm": 1.704789167961506, "learning_rate": 9.312747841731165e-06, "loss": 0.8101, "step": 3028 }, { "epoch": 0.19, "grad_norm": 1.7669388722334505, "learning_rate": 9.312223280366886e-06, "loss": 0.7735, "step": 3029 }, { "epoch": 0.19, "grad_norm": 1.733940807629975, "learning_rate": 9.311698533672313e-06, "loss": 0.7518, "step": 3030 }, { "epoch": 0.19, "grad_norm": 1.495396843994434, "learning_rate": 9.311173601669996e-06, "loss": 0.8409, "step": 3031 }, { "epoch": 0.19, "grad_norm": 1.6968346725891343, "learning_rate": 9.310648484382496e-06, "loss": 0.7144, "step": 3032 }, { "epoch": 0.19, "grad_norm": 1.7029310389629086, "learning_rate": 9.310123181832382e-06, "loss": 0.766, "step": 3033 }, { "epoch": 0.19, "grad_norm": 1.591709283829281, "learning_rate": 9.309597694042232e-06, "loss": 0.7644, "step": 3034 }, { "epoch": 0.19, "grad_norm": 1.6196952999252168, "learning_rate": 9.309072021034629e-06, "loss": 0.709, "step": 3035 }, { "epoch": 0.19, "grad_norm": 1.6468679744414843, "learning_rate": 9.308546162832164e-06, "loss": 0.6856, "step": 3036 }, { "epoch": 0.19, "grad_norm": 1.4898600539639297, "learning_rate": 9.308020119457439e-06, "loss": 0.7121, "step": 3037 }, { "epoch": 0.19, "grad_norm": 1.4774486947610048, "learning_rate": 9.307493890933062e-06, "loss": 0.6677, "step": 3038 }, { "epoch": 0.19, "grad_norm": 1.4849900073065658, "learning_rate": 9.306967477281648e-06, "loss": 0.7172, "step": 3039 }, { "epoch": 0.19, "grad_norm": 1.5836012659152845, "learning_rate": 9.306440878525823e-06, "loss": 0.7461, "step": 3040 }, { "epoch": 0.19, "grad_norm": 1.4818616284805521, "learning_rate": 9.305914094688216e-06, "loss": 0.738, "step": 3041 }, { "epoch": 0.19, "grad_norm": 1.6596004054778974, "learning_rate": 9.305387125791472e-06, "loss": 0.8535, "step": 3042 }, { "epoch": 0.19, "grad_norm": 1.822037056528415, "learning_rate": 9.304859971858233e-06, "loss": 0.7721, "step": 3043 }, { "epoch": 0.19, "grad_norm": 1.715785198021151, "learning_rate": 9.304332632911162e-06, "loss": 0.6688, "step": 3044 }, { "epoch": 0.19, "grad_norm": 1.7546200906378164, "learning_rate": 9.303805108972916e-06, "loss": 0.6988, "step": 3045 }, { "epoch": 0.19, "grad_norm": 1.644332170349742, "learning_rate": 9.30327740006617e-06, "loss": 0.7652, "step": 3046 }, { "epoch": 0.2, "grad_norm": 1.7521639096345514, "learning_rate": 9.302749506213607e-06, "loss": 0.8062, "step": 3047 }, { "epoch": 0.2, "grad_norm": 1.7510203561641595, "learning_rate": 9.302221427437907e-06, "loss": 0.7583, "step": 3048 }, { "epoch": 0.2, "grad_norm": 2.2415513743245232, "learning_rate": 9.301693163761774e-06, "loss": 0.721, "step": 3049 }, { "epoch": 0.2, "grad_norm": 1.8379558888334724, "learning_rate": 9.301164715207907e-06, "loss": 0.7557, "step": 3050 }, { "epoch": 0.2, "grad_norm": 1.8105893821748293, "learning_rate": 9.300636081799017e-06, "loss": 0.7765, "step": 3051 }, { "epoch": 0.2, "grad_norm": 1.5453000602983709, "learning_rate": 9.300107263557827e-06, "loss": 0.7723, "step": 3052 }, { "epoch": 0.2, "grad_norm": 1.6851209433313938, "learning_rate": 9.299578260507061e-06, "loss": 0.8492, "step": 3053 }, { "epoch": 0.2, "grad_norm": 1.4346438881300945, "learning_rate": 9.299049072669458e-06, "loss": 0.7426, "step": 3054 }, { "epoch": 0.2, "grad_norm": 1.6604740364084094, "learning_rate": 9.298519700067757e-06, "loss": 0.747, "step": 3055 }, { "epoch": 0.2, "grad_norm": 1.6379882826197862, "learning_rate": 9.297990142724714e-06, "loss": 0.7685, "step": 3056 }, { "epoch": 0.2, "grad_norm": 1.5406042561821538, "learning_rate": 9.297460400663083e-06, "loss": 0.723, "step": 3057 }, { "epoch": 0.2, "grad_norm": 1.5656817769822324, "learning_rate": 9.296930473905636e-06, "loss": 0.7685, "step": 3058 }, { "epoch": 0.2, "grad_norm": 1.2928681024990887, "learning_rate": 9.296400362475149e-06, "loss": 0.6916, "step": 3059 }, { "epoch": 0.2, "grad_norm": 1.600881859686191, "learning_rate": 9.2958700663944e-06, "loss": 0.7629, "step": 3060 }, { "epoch": 0.2, "grad_norm": 1.618004613006446, "learning_rate": 9.29533958568618e-06, "loss": 0.6946, "step": 3061 }, { "epoch": 0.2, "grad_norm": 1.6879958169858447, "learning_rate": 9.294808920373293e-06, "loss": 0.7334, "step": 3062 }, { "epoch": 0.2, "grad_norm": 1.7155305540170631, "learning_rate": 9.294278070478544e-06, "loss": 0.8428, "step": 3063 }, { "epoch": 0.2, "grad_norm": 1.7863728629173572, "learning_rate": 9.293747036024746e-06, "loss": 0.7544, "step": 3064 }, { "epoch": 0.2, "grad_norm": 1.1963933873147967, "learning_rate": 9.293215817034722e-06, "loss": 0.7494, "step": 3065 }, { "epoch": 0.2, "grad_norm": 1.6783554499878857, "learning_rate": 9.292684413531305e-06, "loss": 0.7758, "step": 3066 }, { "epoch": 0.2, "grad_norm": 1.5090492255695849, "learning_rate": 9.29215282553733e-06, "loss": 0.7639, "step": 3067 }, { "epoch": 0.2, "grad_norm": 1.4893359449103354, "learning_rate": 9.291621053075647e-06, "loss": 0.7095, "step": 3068 }, { "epoch": 0.2, "grad_norm": 1.584418329084102, "learning_rate": 9.291089096169109e-06, "loss": 0.6757, "step": 3069 }, { "epoch": 0.2, "grad_norm": 1.440284134686865, "learning_rate": 9.29055695484058e-06, "loss": 0.6921, "step": 3070 }, { "epoch": 0.2, "grad_norm": 1.6788069821045322, "learning_rate": 9.290024629112927e-06, "loss": 0.7487, "step": 3071 }, { "epoch": 0.2, "grad_norm": 1.6876699014842318, "learning_rate": 9.28949211900903e-06, "loss": 0.6863, "step": 3072 }, { "epoch": 0.2, "grad_norm": 1.5298381266911107, "learning_rate": 9.288959424551775e-06, "loss": 0.6726, "step": 3073 }, { "epoch": 0.2, "grad_norm": 1.605142999539809, "learning_rate": 9.288426545764055e-06, "loss": 0.6804, "step": 3074 }, { "epoch": 0.2, "grad_norm": 1.7644172822103454, "learning_rate": 9.287893482668774e-06, "loss": 0.7626, "step": 3075 }, { "epoch": 0.2, "grad_norm": 1.7792773760198162, "learning_rate": 9.287360235288843e-06, "loss": 0.6781, "step": 3076 }, { "epoch": 0.2, "grad_norm": 1.6549093132986392, "learning_rate": 9.286826803647178e-06, "loss": 0.7749, "step": 3077 }, { "epoch": 0.2, "grad_norm": 1.5318758988776129, "learning_rate": 9.286293187766703e-06, "loss": 0.7314, "step": 3078 }, { "epoch": 0.2, "grad_norm": 1.7026589844655928, "learning_rate": 9.285759387670357e-06, "loss": 0.743, "step": 3079 }, { "epoch": 0.2, "grad_norm": 1.3802944019286216, "learning_rate": 9.285225403381075e-06, "loss": 0.6563, "step": 3080 }, { "epoch": 0.2, "grad_norm": 1.7144116986471778, "learning_rate": 9.28469123492181e-06, "loss": 0.7406, "step": 3081 }, { "epoch": 0.2, "grad_norm": 1.5372072100328578, "learning_rate": 9.28415688231552e-06, "loss": 0.6809, "step": 3082 }, { "epoch": 0.2, "grad_norm": 1.7551587727799087, "learning_rate": 9.283622345585169e-06, "loss": 0.7897, "step": 3083 }, { "epoch": 0.2, "grad_norm": 1.6202303457704528, "learning_rate": 9.28308762475373e-06, "loss": 0.7774, "step": 3084 }, { "epoch": 0.2, "grad_norm": 1.6422058725179955, "learning_rate": 9.282552719844186e-06, "loss": 0.7764, "step": 3085 }, { "epoch": 0.2, "grad_norm": 1.6772175206553126, "learning_rate": 9.282017630879524e-06, "loss": 0.8102, "step": 3086 }, { "epoch": 0.2, "grad_norm": 1.661938409925814, "learning_rate": 9.281482357882743e-06, "loss": 0.7051, "step": 3087 }, { "epoch": 0.2, "grad_norm": 1.7398927810590923, "learning_rate": 9.280946900876847e-06, "loss": 0.7586, "step": 3088 }, { "epoch": 0.2, "grad_norm": 1.6001362463329216, "learning_rate": 9.280411259884847e-06, "loss": 0.8634, "step": 3089 }, { "epoch": 0.2, "grad_norm": 1.5276406801662297, "learning_rate": 9.279875434929767e-06, "loss": 0.6954, "step": 3090 }, { "epoch": 0.2, "grad_norm": 1.5786677556177975, "learning_rate": 9.279339426034634e-06, "loss": 0.7505, "step": 3091 }, { "epoch": 0.2, "grad_norm": 1.4329162201048358, "learning_rate": 9.278803233222484e-06, "loss": 0.7783, "step": 3092 }, { "epoch": 0.2, "grad_norm": 1.7149390724414597, "learning_rate": 9.278266856516363e-06, "loss": 0.8756, "step": 3093 }, { "epoch": 0.2, "grad_norm": 1.4877023035885864, "learning_rate": 9.277730295939322e-06, "loss": 0.7664, "step": 3094 }, { "epoch": 0.2, "grad_norm": 1.6090848665980404, "learning_rate": 9.27719355151442e-06, "loss": 0.818, "step": 3095 }, { "epoch": 0.2, "grad_norm": 1.637254521881606, "learning_rate": 9.276656623264729e-06, "loss": 0.8199, "step": 3096 }, { "epoch": 0.2, "grad_norm": 1.6579152821984666, "learning_rate": 9.276119511213321e-06, "loss": 0.8173, "step": 3097 }, { "epoch": 0.2, "grad_norm": 1.5249865801098277, "learning_rate": 9.275582215383283e-06, "loss": 0.6861, "step": 3098 }, { "epoch": 0.2, "grad_norm": 2.135925360081104, "learning_rate": 9.275044735797705e-06, "loss": 0.6752, "step": 3099 }, { "epoch": 0.2, "grad_norm": 1.5786104731705763, "learning_rate": 9.274507072479688e-06, "loss": 0.6719, "step": 3100 }, { "epoch": 0.2, "grad_norm": 1.443925176416499, "learning_rate": 9.27396922545234e-06, "loss": 0.6797, "step": 3101 }, { "epoch": 0.2, "grad_norm": 1.695083939368986, "learning_rate": 9.273431194738773e-06, "loss": 0.6671, "step": 3102 }, { "epoch": 0.2, "grad_norm": 1.7558493431803415, "learning_rate": 9.272892980362113e-06, "loss": 0.7902, "step": 3103 }, { "epoch": 0.2, "grad_norm": 1.238552296177663, "learning_rate": 9.272354582345492e-06, "loss": 0.6798, "step": 3104 }, { "epoch": 0.2, "grad_norm": 1.4657317184355818, "learning_rate": 9.27181600071205e-06, "loss": 0.6903, "step": 3105 }, { "epoch": 0.2, "grad_norm": 1.5614282708378995, "learning_rate": 9.271277235484932e-06, "loss": 0.7085, "step": 3106 }, { "epoch": 0.2, "grad_norm": 1.5059363580819467, "learning_rate": 9.270738286687293e-06, "loss": 0.716, "step": 3107 }, { "epoch": 0.2, "grad_norm": 1.6237639987733323, "learning_rate": 9.270199154342297e-06, "loss": 0.7313, "step": 3108 }, { "epoch": 0.2, "grad_norm": 1.6917620828425404, "learning_rate": 9.269659838473113e-06, "loss": 0.7304, "step": 3109 }, { "epoch": 0.2, "grad_norm": 1.6387217457980585, "learning_rate": 9.269120339102922e-06, "loss": 0.7101, "step": 3110 }, { "epoch": 0.2, "grad_norm": 1.1848004084041168, "learning_rate": 9.26858065625491e-06, "loss": 0.7139, "step": 3111 }, { "epoch": 0.2, "grad_norm": 1.834583925085255, "learning_rate": 9.26804078995227e-06, "loss": 0.741, "step": 3112 }, { "epoch": 0.2, "grad_norm": 1.683802561871295, "learning_rate": 9.267500740218205e-06, "loss": 0.7331, "step": 3113 }, { "epoch": 0.2, "grad_norm": 1.4333751592340003, "learning_rate": 9.266960507075927e-06, "loss": 0.7411, "step": 3114 }, { "epoch": 0.2, "grad_norm": 1.5370685971967537, "learning_rate": 9.266420090548652e-06, "loss": 0.7459, "step": 3115 }, { "epoch": 0.2, "grad_norm": 1.2158547216398763, "learning_rate": 9.265879490659606e-06, "loss": 0.6663, "step": 3116 }, { "epoch": 0.2, "grad_norm": 1.3299727055632529, "learning_rate": 9.265338707432024e-06, "loss": 0.6793, "step": 3117 }, { "epoch": 0.2, "grad_norm": 1.4530539798733915, "learning_rate": 9.264797740889146e-06, "loss": 0.6659, "step": 3118 }, { "epoch": 0.2, "grad_norm": 1.8188209220990335, "learning_rate": 9.264256591054222e-06, "loss": 0.6911, "step": 3119 }, { "epoch": 0.2, "grad_norm": 1.593301875926419, "learning_rate": 9.263715257950511e-06, "loss": 0.7407, "step": 3120 }, { "epoch": 0.2, "grad_norm": 1.562822848871447, "learning_rate": 9.263173741601278e-06, "loss": 0.8345, "step": 3121 }, { "epoch": 0.2, "grad_norm": 1.7239532112335278, "learning_rate": 9.262632042029796e-06, "loss": 0.6211, "step": 3122 }, { "epoch": 0.2, "grad_norm": 1.6366388570832509, "learning_rate": 9.262090159259345e-06, "loss": 0.6942, "step": 3123 }, { "epoch": 0.2, "grad_norm": 1.5528313629242827, "learning_rate": 9.261548093313216e-06, "loss": 0.7743, "step": 3124 }, { "epoch": 0.2, "grad_norm": 1.78448015607614, "learning_rate": 9.261005844214703e-06, "loss": 0.6513, "step": 3125 }, { "epoch": 0.2, "grad_norm": 1.4464811858697306, "learning_rate": 9.260463411987113e-06, "loss": 0.6915, "step": 3126 }, { "epoch": 0.2, "grad_norm": 1.5956503681182037, "learning_rate": 9.25992079665376e-06, "loss": 0.7533, "step": 3127 }, { "epoch": 0.2, "grad_norm": 1.0998071657078028, "learning_rate": 9.25937799823796e-06, "loss": 0.8016, "step": 3128 }, { "epoch": 0.2, "grad_norm": 1.1438793883810625, "learning_rate": 9.258835016763044e-06, "loss": 0.6171, "step": 3129 }, { "epoch": 0.2, "grad_norm": 1.266374566094136, "learning_rate": 9.258291852252349e-06, "loss": 0.6242, "step": 3130 }, { "epoch": 0.2, "grad_norm": 1.6354477110283656, "learning_rate": 9.257748504729218e-06, "loss": 0.7944, "step": 3131 }, { "epoch": 0.2, "grad_norm": 1.57764626771954, "learning_rate": 9.257204974217002e-06, "loss": 0.6894, "step": 3132 }, { "epoch": 0.2, "grad_norm": 1.9386396711001401, "learning_rate": 9.256661260739065e-06, "loss": 0.6427, "step": 3133 }, { "epoch": 0.2, "grad_norm": 1.3094604131467185, "learning_rate": 9.256117364318768e-06, "loss": 0.6914, "step": 3134 }, { "epoch": 0.2, "grad_norm": 1.1981282307572114, "learning_rate": 9.255573284979491e-06, "loss": 0.7057, "step": 3135 }, { "epoch": 0.2, "grad_norm": 3.0854988835430825, "learning_rate": 9.255029022744615e-06, "loss": 0.7894, "step": 3136 }, { "epoch": 0.2, "grad_norm": 1.6790469457503958, "learning_rate": 9.254484577637534e-06, "loss": 0.8029, "step": 3137 }, { "epoch": 0.2, "grad_norm": 1.5748392249366758, "learning_rate": 9.253939949681648e-06, "loss": 0.7274, "step": 3138 }, { "epoch": 0.2, "grad_norm": 1.5468600569115327, "learning_rate": 9.253395138900359e-06, "loss": 0.8141, "step": 3139 }, { "epoch": 0.2, "grad_norm": 1.8685441084207215, "learning_rate": 9.252850145317085e-06, "loss": 0.7789, "step": 3140 }, { "epoch": 0.2, "grad_norm": 1.4640341664241754, "learning_rate": 9.252304968955248e-06, "loss": 0.7326, "step": 3141 }, { "epoch": 0.2, "grad_norm": 1.605006048878855, "learning_rate": 9.251759609838278e-06, "loss": 0.7621, "step": 3142 }, { "epoch": 0.2, "grad_norm": 1.4657816906037888, "learning_rate": 9.251214067989617e-06, "loss": 0.7305, "step": 3143 }, { "epoch": 0.2, "grad_norm": 1.1148867841272745, "learning_rate": 9.250668343432707e-06, "loss": 0.6568, "step": 3144 }, { "epoch": 0.2, "grad_norm": 1.7199223359877276, "learning_rate": 9.250122436191002e-06, "loss": 0.6412, "step": 3145 }, { "epoch": 0.2, "grad_norm": 1.506182828246, "learning_rate": 9.249576346287967e-06, "loss": 0.7088, "step": 3146 }, { "epoch": 0.2, "grad_norm": 1.179202060024002, "learning_rate": 9.249030073747069e-06, "loss": 0.6519, "step": 3147 }, { "epoch": 0.2, "grad_norm": 1.5774173202858996, "learning_rate": 9.248483618591788e-06, "loss": 0.719, "step": 3148 }, { "epoch": 0.2, "grad_norm": 1.6049630853867696, "learning_rate": 9.247936980845608e-06, "loss": 0.7678, "step": 3149 }, { "epoch": 0.2, "grad_norm": 1.487076102475727, "learning_rate": 9.247390160532024e-06, "loss": 0.7106, "step": 3150 }, { "epoch": 0.2, "grad_norm": 1.6187979379919648, "learning_rate": 9.246843157674535e-06, "loss": 0.7849, "step": 3151 }, { "epoch": 0.2, "grad_norm": 1.5300267885005723, "learning_rate": 9.246295972296651e-06, "loss": 0.7734, "step": 3152 }, { "epoch": 0.2, "grad_norm": 1.495576812531809, "learning_rate": 9.245748604421888e-06, "loss": 0.6886, "step": 3153 }, { "epoch": 0.2, "grad_norm": 1.542614912077344, "learning_rate": 9.245201054073772e-06, "loss": 0.7305, "step": 3154 }, { "epoch": 0.2, "grad_norm": 1.3703804382206168, "learning_rate": 9.244653321275834e-06, "loss": 0.7951, "step": 3155 }, { "epoch": 0.2, "grad_norm": 1.9046206369148373, "learning_rate": 9.244105406051617e-06, "loss": 0.8012, "step": 3156 }, { "epoch": 0.2, "grad_norm": 1.068977640811144, "learning_rate": 9.243557308424667e-06, "loss": 0.6523, "step": 3157 }, { "epoch": 0.2, "grad_norm": 1.7652126051341113, "learning_rate": 9.243009028418542e-06, "loss": 0.7684, "step": 3158 }, { "epoch": 0.2, "grad_norm": 1.5154467588140126, "learning_rate": 9.242460566056803e-06, "loss": 0.7419, "step": 3159 }, { "epoch": 0.2, "grad_norm": 1.8507464750086131, "learning_rate": 9.241911921363025e-06, "loss": 0.7667, "step": 3160 }, { "epoch": 0.2, "grad_norm": 1.6483428680023253, "learning_rate": 9.241363094360785e-06, "loss": 0.8222, "step": 3161 }, { "epoch": 0.2, "grad_norm": 1.1339092182736967, "learning_rate": 9.240814085073673e-06, "loss": 0.6931, "step": 3162 }, { "epoch": 0.2, "grad_norm": 1.0374332136070188, "learning_rate": 9.240264893525281e-06, "loss": 0.604, "step": 3163 }, { "epoch": 0.2, "grad_norm": 1.5224378193706687, "learning_rate": 9.239715519739215e-06, "loss": 0.7439, "step": 3164 }, { "epoch": 0.2, "grad_norm": 1.491655444886776, "learning_rate": 9.239165963739084e-06, "loss": 0.6902, "step": 3165 }, { "epoch": 0.2, "grad_norm": 1.495888371902572, "learning_rate": 9.23861622554851e-06, "loss": 0.7258, "step": 3166 }, { "epoch": 0.2, "grad_norm": 1.640912841947437, "learning_rate": 9.238066305191113e-06, "loss": 0.7341, "step": 3167 }, { "epoch": 0.2, "grad_norm": 1.8075101944313365, "learning_rate": 9.237516202690535e-06, "loss": 0.8379, "step": 3168 }, { "epoch": 0.2, "grad_norm": 1.1203856453049539, "learning_rate": 9.236965918070413e-06, "loss": 0.6707, "step": 3169 }, { "epoch": 0.2, "grad_norm": 1.8802727401064305, "learning_rate": 9.2364154513544e-06, "loss": 0.7119, "step": 3170 }, { "epoch": 0.2, "grad_norm": 1.6251714957519048, "learning_rate": 9.235864802566153e-06, "loss": 0.787, "step": 3171 }, { "epoch": 0.2, "grad_norm": 1.5669869713759192, "learning_rate": 9.235313971729338e-06, "loss": 0.7439, "step": 3172 }, { "epoch": 0.2, "grad_norm": 1.5246921701837912, "learning_rate": 9.234762958867627e-06, "loss": 0.7144, "step": 3173 }, { "epoch": 0.2, "grad_norm": 1.4777122824881856, "learning_rate": 9.234211764004701e-06, "loss": 0.7598, "step": 3174 }, { "epoch": 0.2, "grad_norm": 1.5467991351682904, "learning_rate": 9.233660387164254e-06, "loss": 0.7841, "step": 3175 }, { "epoch": 0.2, "grad_norm": 1.536932798677084, "learning_rate": 9.233108828369977e-06, "loss": 0.6886, "step": 3176 }, { "epoch": 0.2, "grad_norm": 1.4834244182876162, "learning_rate": 9.23255708764558e-06, "loss": 0.6971, "step": 3177 }, { "epoch": 0.2, "grad_norm": 1.242019503385302, "learning_rate": 9.23200516501477e-06, "loss": 0.6912, "step": 3178 }, { "epoch": 0.2, "grad_norm": 1.6400565883952152, "learning_rate": 9.231453060501274e-06, "loss": 0.8248, "step": 3179 }, { "epoch": 0.2, "grad_norm": 1.6455175440141898, "learning_rate": 9.230900774128815e-06, "loss": 0.8453, "step": 3180 }, { "epoch": 0.2, "grad_norm": 1.4812139008349035, "learning_rate": 9.230348305921131e-06, "loss": 0.7744, "step": 3181 }, { "epoch": 0.2, "grad_norm": 1.4856211637970682, "learning_rate": 9.229795655901967e-06, "loss": 0.7034, "step": 3182 }, { "epoch": 0.2, "grad_norm": 1.6559704143246026, "learning_rate": 9.229242824095074e-06, "loss": 0.8229, "step": 3183 }, { "epoch": 0.2, "grad_norm": 1.4001059427727227, "learning_rate": 9.22868981052421e-06, "loss": 0.7068, "step": 3184 }, { "epoch": 0.2, "grad_norm": 1.5699481409803189, "learning_rate": 9.228136615213144e-06, "loss": 0.6983, "step": 3185 }, { "epoch": 0.2, "grad_norm": 1.6243548868487783, "learning_rate": 9.227583238185651e-06, "loss": 0.715, "step": 3186 }, { "epoch": 0.2, "grad_norm": 1.6106071874645573, "learning_rate": 9.227029679465515e-06, "loss": 0.715, "step": 3187 }, { "epoch": 0.2, "grad_norm": 1.5535434865362807, "learning_rate": 9.226475939076525e-06, "loss": 0.7216, "step": 3188 }, { "epoch": 0.2, "grad_norm": 1.513303351859624, "learning_rate": 9.225922017042478e-06, "loss": 0.7274, "step": 3189 }, { "epoch": 0.2, "grad_norm": 1.569139020810828, "learning_rate": 9.225367913387184e-06, "loss": 0.8547, "step": 3190 }, { "epoch": 0.2, "grad_norm": 2.368697939821245, "learning_rate": 9.224813628134457e-06, "loss": 0.6799, "step": 3191 }, { "epoch": 0.2, "grad_norm": 1.5447658462623866, "learning_rate": 9.224259161308116e-06, "loss": 0.7623, "step": 3192 }, { "epoch": 0.2, "grad_norm": 1.504163091739907, "learning_rate": 9.223704512931994e-06, "loss": 0.7013, "step": 3193 }, { "epoch": 0.2, "grad_norm": 1.4464226923879508, "learning_rate": 9.223149683029927e-06, "loss": 0.6825, "step": 3194 }, { "epoch": 0.2, "grad_norm": 1.326195248449625, "learning_rate": 9.22259467162576e-06, "loss": 0.6971, "step": 3195 }, { "epoch": 0.2, "grad_norm": 1.5459284207858186, "learning_rate": 9.222039478743348e-06, "loss": 0.8673, "step": 3196 }, { "epoch": 0.2, "grad_norm": 1.5486892811920772, "learning_rate": 9.221484104406551e-06, "loss": 0.8442, "step": 3197 }, { "epoch": 0.2, "grad_norm": 1.5875261376552061, "learning_rate": 9.220928548639237e-06, "loss": 0.8091, "step": 3198 }, { "epoch": 0.2, "grad_norm": 1.5102632722133738, "learning_rate": 9.220372811465282e-06, "loss": 0.7313, "step": 3199 }, { "epoch": 0.2, "grad_norm": 1.7541356868131843, "learning_rate": 9.219816892908573e-06, "loss": 0.7392, "step": 3200 }, { "epoch": 0.2, "grad_norm": 1.4775534508974204, "learning_rate": 9.219260792993001e-06, "loss": 0.6905, "step": 3201 }, { "epoch": 0.2, "grad_norm": 1.1957147950961393, "learning_rate": 9.218704511742466e-06, "loss": 0.6506, "step": 3202 }, { "epoch": 0.21, "grad_norm": 1.7003768126509324, "learning_rate": 9.218148049180878e-06, "loss": 0.7702, "step": 3203 }, { "epoch": 0.21, "grad_norm": 1.6504636130137949, "learning_rate": 9.217591405332147e-06, "loss": 0.6976, "step": 3204 }, { "epoch": 0.21, "grad_norm": 1.4311318016732515, "learning_rate": 9.217034580220203e-06, "loss": 0.7079, "step": 3205 }, { "epoch": 0.21, "grad_norm": 1.9353846465782811, "learning_rate": 9.216477573868972e-06, "loss": 0.6075, "step": 3206 }, { "epoch": 0.21, "grad_norm": 1.737107912849892, "learning_rate": 9.215920386302393e-06, "loss": 0.7485, "step": 3207 }, { "epoch": 0.21, "grad_norm": 1.481722020313972, "learning_rate": 9.21536301754442e-06, "loss": 0.657, "step": 3208 }, { "epoch": 0.21, "grad_norm": 1.588164500767868, "learning_rate": 9.214805467618996e-06, "loss": 0.7126, "step": 3209 }, { "epoch": 0.21, "grad_norm": 1.4784480294837399, "learning_rate": 9.214247736550092e-06, "loss": 0.6938, "step": 3210 }, { "epoch": 0.21, "grad_norm": 1.6796728814380983, "learning_rate": 9.213689824361678e-06, "loss": 0.7902, "step": 3211 }, { "epoch": 0.21, "grad_norm": 1.5563003795276007, "learning_rate": 9.213131731077726e-06, "loss": 0.7301, "step": 3212 }, { "epoch": 0.21, "grad_norm": 1.2627484196865062, "learning_rate": 9.212573456722227e-06, "loss": 0.6378, "step": 3213 }, { "epoch": 0.21, "grad_norm": 1.8132920062285522, "learning_rate": 9.212015001319173e-06, "loss": 0.7856, "step": 3214 }, { "epoch": 0.21, "grad_norm": 1.5183926439593833, "learning_rate": 9.211456364892566e-06, "loss": 0.6949, "step": 3215 }, { "epoch": 0.21, "grad_norm": 1.6188609404646477, "learning_rate": 9.21089754746641e-06, "loss": 0.6391, "step": 3216 }, { "epoch": 0.21, "grad_norm": 1.7864341574766698, "learning_rate": 9.210338549064728e-06, "loss": 0.7444, "step": 3217 }, { "epoch": 0.21, "grad_norm": 1.485314820008313, "learning_rate": 9.209779369711544e-06, "loss": 0.7424, "step": 3218 }, { "epoch": 0.21, "grad_norm": 1.72843147346921, "learning_rate": 9.209220009430886e-06, "loss": 0.8085, "step": 3219 }, { "epoch": 0.21, "grad_norm": 1.524747793192942, "learning_rate": 9.208660468246799e-06, "loss": 0.8201, "step": 3220 }, { "epoch": 0.21, "grad_norm": 1.8821993932735825, "learning_rate": 9.208100746183327e-06, "loss": 0.7561, "step": 3221 }, { "epoch": 0.21, "grad_norm": 1.8287554882024066, "learning_rate": 9.207540843264527e-06, "loss": 0.7108, "step": 3222 }, { "epoch": 0.21, "grad_norm": 1.4822292456205497, "learning_rate": 9.206980759514464e-06, "loss": 0.7473, "step": 3223 }, { "epoch": 0.21, "grad_norm": 1.5129752952363265, "learning_rate": 9.206420494957208e-06, "loss": 0.7684, "step": 3224 }, { "epoch": 0.21, "grad_norm": 1.447813231092412, "learning_rate": 9.205860049616837e-06, "loss": 0.6965, "step": 3225 }, { "epoch": 0.21, "grad_norm": 3.2561586409521004, "learning_rate": 9.20529942351744e-06, "loss": 0.7356, "step": 3226 }, { "epoch": 0.21, "grad_norm": 1.6970255748035403, "learning_rate": 9.20473861668311e-06, "loss": 0.708, "step": 3227 }, { "epoch": 0.21, "grad_norm": 1.5247958028326112, "learning_rate": 9.20417762913795e-06, "loss": 0.7594, "step": 3228 }, { "epoch": 0.21, "grad_norm": 1.6153557267576621, "learning_rate": 9.203616460906068e-06, "loss": 0.8022, "step": 3229 }, { "epoch": 0.21, "grad_norm": 1.431666898711257, "learning_rate": 9.203055112011585e-06, "loss": 0.7579, "step": 3230 }, { "epoch": 0.21, "grad_norm": 1.126688786618298, "learning_rate": 9.202493582478625e-06, "loss": 0.5987, "step": 3231 }, { "epoch": 0.21, "grad_norm": 1.4793206476428096, "learning_rate": 9.201931872331322e-06, "loss": 0.7587, "step": 3232 }, { "epoch": 0.21, "grad_norm": 1.7589460581470804, "learning_rate": 9.201369981593816e-06, "loss": 0.7579, "step": 3233 }, { "epoch": 0.21, "grad_norm": 1.5094540816710473, "learning_rate": 9.200807910290256e-06, "loss": 0.8201, "step": 3234 }, { "epoch": 0.21, "grad_norm": 1.5400178194132554, "learning_rate": 9.200245658444799e-06, "loss": 0.7542, "step": 3235 }, { "epoch": 0.21, "grad_norm": 1.4709370855253245, "learning_rate": 9.199683226081611e-06, "loss": 0.7099, "step": 3236 }, { "epoch": 0.21, "grad_norm": 1.5863175042449267, "learning_rate": 9.199120613224862e-06, "loss": 0.6662, "step": 3237 }, { "epoch": 0.21, "grad_norm": 1.6251046177692132, "learning_rate": 9.198557819898732e-06, "loss": 0.7738, "step": 3238 }, { "epoch": 0.21, "grad_norm": 1.4128165953599636, "learning_rate": 9.197994846127409e-06, "loss": 0.7401, "step": 3239 }, { "epoch": 0.21, "grad_norm": 1.5011204725514506, "learning_rate": 9.19743169193509e-06, "loss": 0.7532, "step": 3240 }, { "epoch": 0.21, "grad_norm": 1.1793091799268653, "learning_rate": 9.196868357345976e-06, "loss": 0.6381, "step": 3241 }, { "epoch": 0.21, "grad_norm": 1.3615669750403026, "learning_rate": 9.19630484238428e-06, "loss": 0.8158, "step": 3242 }, { "epoch": 0.21, "grad_norm": 1.560429709295658, "learning_rate": 9.195741147074218e-06, "loss": 0.8259, "step": 3243 }, { "epoch": 0.21, "grad_norm": 1.1512672884609698, "learning_rate": 9.195177271440019e-06, "loss": 0.5542, "step": 3244 }, { "epoch": 0.21, "grad_norm": 1.738444011041499, "learning_rate": 9.194613215505916e-06, "loss": 0.7935, "step": 3245 }, { "epoch": 0.21, "grad_norm": 1.6868526361250589, "learning_rate": 9.194048979296151e-06, "loss": 0.6896, "step": 3246 }, { "epoch": 0.21, "grad_norm": 1.5003674889275758, "learning_rate": 9.193484562834973e-06, "loss": 0.733, "step": 3247 }, { "epoch": 0.21, "grad_norm": 1.5898315803535212, "learning_rate": 9.19291996614664e-06, "loss": 0.6836, "step": 3248 }, { "epoch": 0.21, "grad_norm": 1.0288406226512519, "learning_rate": 9.192355189255418e-06, "loss": 0.6686, "step": 3249 }, { "epoch": 0.21, "grad_norm": 1.7989460589437027, "learning_rate": 9.19179023218558e-06, "loss": 0.7183, "step": 3250 }, { "epoch": 0.21, "grad_norm": 1.618533430795838, "learning_rate": 9.191225094961407e-06, "loss": 0.7715, "step": 3251 }, { "epoch": 0.21, "grad_norm": 1.5835014704885164, "learning_rate": 9.190659777607185e-06, "loss": 0.8265, "step": 3252 }, { "epoch": 0.21, "grad_norm": 1.6064974301156012, "learning_rate": 9.19009428014721e-06, "loss": 0.761, "step": 3253 }, { "epoch": 0.21, "grad_norm": 2.1034439559215, "learning_rate": 9.189528602605789e-06, "loss": 0.7356, "step": 3254 }, { "epoch": 0.21, "grad_norm": 1.5531876770323778, "learning_rate": 9.188962745007233e-06, "loss": 0.7227, "step": 3255 }, { "epoch": 0.21, "grad_norm": 1.1466962341523792, "learning_rate": 9.18839670737586e-06, "loss": 0.6158, "step": 3256 }, { "epoch": 0.21, "grad_norm": 1.574088534427165, "learning_rate": 9.187830489735996e-06, "loss": 0.7481, "step": 3257 }, { "epoch": 0.21, "grad_norm": 1.5109404313103227, "learning_rate": 9.187264092111978e-06, "loss": 0.7508, "step": 3258 }, { "epoch": 0.21, "grad_norm": 1.2635963451722854, "learning_rate": 9.18669751452815e-06, "loss": 0.6652, "step": 3259 }, { "epoch": 0.21, "grad_norm": 1.6850699786651264, "learning_rate": 9.186130757008858e-06, "loss": 0.7125, "step": 3260 }, { "epoch": 0.21, "grad_norm": 2.8284879625395454, "learning_rate": 9.185563819578462e-06, "loss": 0.7795, "step": 3261 }, { "epoch": 0.21, "grad_norm": 1.53920061812033, "learning_rate": 9.18499670226133e-06, "loss": 0.7449, "step": 3262 }, { "epoch": 0.21, "grad_norm": 1.6455039096709865, "learning_rate": 9.184429405081832e-06, "loss": 0.7512, "step": 3263 }, { "epoch": 0.21, "grad_norm": 1.4483230519668187, "learning_rate": 9.183861928064353e-06, "loss": 0.7315, "step": 3264 }, { "epoch": 0.21, "grad_norm": 1.4569835675159368, "learning_rate": 9.183294271233278e-06, "loss": 0.7298, "step": 3265 }, { "epoch": 0.21, "grad_norm": 1.6650790409486569, "learning_rate": 9.182726434613006e-06, "loss": 0.6608, "step": 3266 }, { "epoch": 0.21, "grad_norm": 1.562183636742118, "learning_rate": 9.18215841822794e-06, "loss": 0.79, "step": 3267 }, { "epoch": 0.21, "grad_norm": 1.7755399463409338, "learning_rate": 9.181590222102494e-06, "loss": 0.7886, "step": 3268 }, { "epoch": 0.21, "grad_norm": 1.879714764950092, "learning_rate": 9.181021846261088e-06, "loss": 0.7981, "step": 3269 }, { "epoch": 0.21, "grad_norm": 1.5836761923725535, "learning_rate": 9.180453290728146e-06, "loss": 0.7123, "step": 3270 }, { "epoch": 0.21, "grad_norm": 1.5366815047919469, "learning_rate": 9.17988455552811e-06, "loss": 0.7243, "step": 3271 }, { "epoch": 0.21, "grad_norm": 1.4687369253025695, "learning_rate": 9.179315640685416e-06, "loss": 0.7405, "step": 3272 }, { "epoch": 0.21, "grad_norm": 1.5196068017431699, "learning_rate": 9.178746546224517e-06, "loss": 0.7289, "step": 3273 }, { "epoch": 0.21, "grad_norm": 1.5992606289566882, "learning_rate": 9.178177272169874e-06, "loss": 0.6731, "step": 3274 }, { "epoch": 0.21, "grad_norm": 1.707749088538412, "learning_rate": 9.177607818545951e-06, "loss": 0.7214, "step": 3275 }, { "epoch": 0.21, "grad_norm": 1.6067797388987708, "learning_rate": 9.177038185377222e-06, "loss": 0.7087, "step": 3276 }, { "epoch": 0.21, "grad_norm": 1.4985691813290056, "learning_rate": 9.176468372688168e-06, "loss": 0.7297, "step": 3277 }, { "epoch": 0.21, "grad_norm": 1.4843823461054808, "learning_rate": 9.175898380503281e-06, "loss": 0.692, "step": 3278 }, { "epoch": 0.21, "grad_norm": 1.581425115583686, "learning_rate": 9.175328208847056e-06, "loss": 0.7318, "step": 3279 }, { "epoch": 0.21, "grad_norm": 2.775048311143906, "learning_rate": 9.174757857743997e-06, "loss": 0.795, "step": 3280 }, { "epoch": 0.21, "grad_norm": 2.902581013392771, "learning_rate": 9.17418732721862e-06, "loss": 0.7381, "step": 3281 }, { "epoch": 0.21, "grad_norm": 1.9687958648978792, "learning_rate": 9.173616617295442e-06, "loss": 0.7407, "step": 3282 }, { "epoch": 0.21, "grad_norm": 1.6377712305943695, "learning_rate": 9.17304572799899e-06, "loss": 0.7182, "step": 3283 }, { "epoch": 0.21, "grad_norm": 1.5896177702178278, "learning_rate": 9.172474659353803e-06, "loss": 0.6853, "step": 3284 }, { "epoch": 0.21, "grad_norm": 1.699933945820486, "learning_rate": 9.171903411384422e-06, "loss": 0.8241, "step": 3285 }, { "epoch": 0.21, "grad_norm": 2.0750637284407576, "learning_rate": 9.171331984115399e-06, "loss": 0.7112, "step": 3286 }, { "epoch": 0.21, "grad_norm": 1.7083518604025754, "learning_rate": 9.170760377571293e-06, "loss": 0.7742, "step": 3287 }, { "epoch": 0.21, "grad_norm": 1.599096082136968, "learning_rate": 9.17018859177667e-06, "loss": 0.6594, "step": 3288 }, { "epoch": 0.21, "grad_norm": 2.0107429477572065, "learning_rate": 9.169616626756103e-06, "loss": 0.7677, "step": 3289 }, { "epoch": 0.21, "grad_norm": 1.279770360013972, "learning_rate": 9.169044482534175e-06, "loss": 0.7497, "step": 3290 }, { "epoch": 0.21, "grad_norm": 1.7302919462620183, "learning_rate": 9.168472159135477e-06, "loss": 0.7448, "step": 3291 }, { "epoch": 0.21, "grad_norm": 1.464543095011277, "learning_rate": 9.167899656584602e-06, "loss": 0.7364, "step": 3292 }, { "epoch": 0.21, "grad_norm": 1.4327402848660975, "learning_rate": 9.167326974906161e-06, "loss": 0.6985, "step": 3293 }, { "epoch": 0.21, "grad_norm": 1.6158682582601631, "learning_rate": 9.166754114124761e-06, "loss": 0.7294, "step": 3294 }, { "epoch": 0.21, "grad_norm": 1.7200588297783759, "learning_rate": 9.166181074265027e-06, "loss": 0.7384, "step": 3295 }, { "epoch": 0.21, "grad_norm": 2.0580554690280874, "learning_rate": 9.165607855351583e-06, "loss": 0.7482, "step": 3296 }, { "epoch": 0.21, "grad_norm": 1.619171789114178, "learning_rate": 9.165034457409066e-06, "loss": 0.687, "step": 3297 }, { "epoch": 0.21, "grad_norm": 1.737265420608195, "learning_rate": 9.164460880462121e-06, "loss": 0.7355, "step": 3298 }, { "epoch": 0.21, "grad_norm": 1.6032181613060557, "learning_rate": 9.163887124535398e-06, "loss": 0.7204, "step": 3299 }, { "epoch": 0.21, "grad_norm": 1.5536906220466025, "learning_rate": 9.163313189653557e-06, "loss": 0.7285, "step": 3300 }, { "epoch": 0.21, "grad_norm": 1.564854376334502, "learning_rate": 9.162739075841263e-06, "loss": 0.7237, "step": 3301 }, { "epoch": 0.21, "grad_norm": 1.2528965100968519, "learning_rate": 9.162164783123188e-06, "loss": 0.5556, "step": 3302 }, { "epoch": 0.21, "grad_norm": 1.5428389259858049, "learning_rate": 9.16159031152402e-06, "loss": 0.7331, "step": 3303 }, { "epoch": 0.21, "grad_norm": 1.2241957833701536, "learning_rate": 9.161015661068443e-06, "loss": 0.6455, "step": 3304 }, { "epoch": 0.21, "grad_norm": 1.4745929576429144, "learning_rate": 9.160440831781157e-06, "loss": 0.7925, "step": 3305 }, { "epoch": 0.21, "grad_norm": 1.0794892363592918, "learning_rate": 9.159865823686867e-06, "loss": 0.5513, "step": 3306 }, { "epoch": 0.21, "grad_norm": 1.7078402416384575, "learning_rate": 9.159290636810284e-06, "loss": 0.7808, "step": 3307 }, { "epoch": 0.21, "grad_norm": 1.9145226328664697, "learning_rate": 9.158715271176131e-06, "loss": 0.7605, "step": 3308 }, { "epoch": 0.21, "grad_norm": 1.5945493899343575, "learning_rate": 9.158139726809132e-06, "loss": 0.6958, "step": 3309 }, { "epoch": 0.21, "grad_norm": 1.5600145726667278, "learning_rate": 9.157564003734025e-06, "loss": 0.6509, "step": 3310 }, { "epoch": 0.21, "grad_norm": 1.6119919867565569, "learning_rate": 9.156988101975555e-06, "loss": 0.7087, "step": 3311 }, { "epoch": 0.21, "grad_norm": 1.8490089288657483, "learning_rate": 9.156412021558471e-06, "loss": 0.7581, "step": 3312 }, { "epoch": 0.21, "grad_norm": 1.3529355183612282, "learning_rate": 9.155835762507532e-06, "loss": 0.8095, "step": 3313 }, { "epoch": 0.21, "grad_norm": 1.6821764240569834, "learning_rate": 9.155259324847504e-06, "loss": 0.7313, "step": 3314 }, { "epoch": 0.21, "grad_norm": 1.4397084823437947, "learning_rate": 9.154682708603162e-06, "loss": 0.6502, "step": 3315 }, { "epoch": 0.21, "grad_norm": 1.6109168052818024, "learning_rate": 9.154105913799289e-06, "loss": 0.7056, "step": 3316 }, { "epoch": 0.21, "grad_norm": 1.717579616902208, "learning_rate": 9.153528940460669e-06, "loss": 0.6895, "step": 3317 }, { "epoch": 0.21, "grad_norm": 1.6885577270314736, "learning_rate": 9.152951788612105e-06, "loss": 0.7427, "step": 3318 }, { "epoch": 0.21, "grad_norm": 1.5913884167150105, "learning_rate": 9.152374458278402e-06, "loss": 0.7786, "step": 3319 }, { "epoch": 0.21, "grad_norm": 1.5274841245974147, "learning_rate": 9.151796949484367e-06, "loss": 0.8261, "step": 3320 }, { "epoch": 0.21, "grad_norm": 1.530689901705497, "learning_rate": 9.151219262254824e-06, "loss": 0.7525, "step": 3321 }, { "epoch": 0.21, "grad_norm": 1.4573867439880122, "learning_rate": 9.150641396614601e-06, "loss": 0.7344, "step": 3322 }, { "epoch": 0.21, "grad_norm": 1.8681170310322535, "learning_rate": 9.150063352588531e-06, "loss": 0.7059, "step": 3323 }, { "epoch": 0.21, "grad_norm": 1.8087529573908545, "learning_rate": 9.14948513020146e-06, "loss": 0.6698, "step": 3324 }, { "epoch": 0.21, "grad_norm": 1.7142707627606781, "learning_rate": 9.148906729478236e-06, "loss": 0.8355, "step": 3325 }, { "epoch": 0.21, "grad_norm": 2.15019532008024, "learning_rate": 9.148328150443719e-06, "loss": 0.675, "step": 3326 }, { "epoch": 0.21, "grad_norm": 1.742086697223966, "learning_rate": 9.147749393122776e-06, "loss": 0.7809, "step": 3327 }, { "epoch": 0.21, "grad_norm": 1.6808916620560372, "learning_rate": 9.14717045754028e-06, "loss": 0.7661, "step": 3328 }, { "epoch": 0.21, "grad_norm": 1.6600958674199526, "learning_rate": 9.14659134372111e-06, "loss": 0.7514, "step": 3329 }, { "epoch": 0.21, "grad_norm": 1.5671587550755357, "learning_rate": 9.14601205169016e-06, "loss": 0.723, "step": 3330 }, { "epoch": 0.21, "grad_norm": 1.6276949069893072, "learning_rate": 9.145432581472323e-06, "loss": 0.7126, "step": 3331 }, { "epoch": 0.21, "grad_norm": 1.6298989036184979, "learning_rate": 9.144852933092503e-06, "loss": 0.7428, "step": 3332 }, { "epoch": 0.21, "grad_norm": 1.6637714767398226, "learning_rate": 9.144273106575614e-06, "loss": 0.7525, "step": 3333 }, { "epoch": 0.21, "grad_norm": 1.6298232396284795, "learning_rate": 9.143693101946576e-06, "loss": 0.8591, "step": 3334 }, { "epoch": 0.21, "grad_norm": 1.950396941856726, "learning_rate": 9.143112919230314e-06, "loss": 0.7054, "step": 3335 }, { "epoch": 0.21, "grad_norm": 1.8429462956098919, "learning_rate": 9.142532558451767e-06, "loss": 0.8089, "step": 3336 }, { "epoch": 0.21, "grad_norm": 1.1495859911021065, "learning_rate": 9.141952019635874e-06, "loss": 0.6627, "step": 3337 }, { "epoch": 0.21, "grad_norm": 1.6328861239293802, "learning_rate": 9.141371302807586e-06, "loss": 0.7245, "step": 3338 }, { "epoch": 0.21, "grad_norm": 1.7736253666614068, "learning_rate": 9.140790407991862e-06, "loss": 0.7639, "step": 3339 }, { "epoch": 0.21, "grad_norm": 1.7448795584339913, "learning_rate": 9.140209335213667e-06, "loss": 0.8014, "step": 3340 }, { "epoch": 0.21, "grad_norm": 1.3782069681336007, "learning_rate": 9.139628084497975e-06, "loss": 0.7015, "step": 3341 }, { "epoch": 0.21, "grad_norm": 1.5315031250329334, "learning_rate": 9.139046655869767e-06, "loss": 0.7532, "step": 3342 }, { "epoch": 0.21, "grad_norm": 1.5551505914831265, "learning_rate": 9.138465049354031e-06, "loss": 0.7185, "step": 3343 }, { "epoch": 0.21, "grad_norm": 1.8153724600717338, "learning_rate": 9.137883264975763e-06, "loss": 0.7635, "step": 3344 }, { "epoch": 0.21, "grad_norm": 1.34441291808209, "learning_rate": 9.137301302759968e-06, "loss": 0.6588, "step": 3345 }, { "epoch": 0.21, "grad_norm": 1.5661217368676186, "learning_rate": 9.136719162731655e-06, "loss": 0.8406, "step": 3346 }, { "epoch": 0.21, "grad_norm": 6.756649904863547, "learning_rate": 9.136136844915847e-06, "loss": 0.7833, "step": 3347 }, { "epoch": 0.21, "grad_norm": 1.5184634803707096, "learning_rate": 9.135554349337567e-06, "loss": 0.7332, "step": 3348 }, { "epoch": 0.21, "grad_norm": 1.8066182621470976, "learning_rate": 9.134971676021851e-06, "loss": 0.8106, "step": 3349 }, { "epoch": 0.21, "grad_norm": 1.596589808390807, "learning_rate": 9.13438882499374e-06, "loss": 0.7043, "step": 3350 }, { "epoch": 0.21, "grad_norm": 1.6346697710642162, "learning_rate": 9.133805796278288e-06, "loss": 0.7474, "step": 3351 }, { "epoch": 0.21, "grad_norm": 1.449603778938006, "learning_rate": 9.133222589900547e-06, "loss": 0.7431, "step": 3352 }, { "epoch": 0.21, "grad_norm": 1.467157289696385, "learning_rate": 9.132639205885584e-06, "loss": 0.7862, "step": 3353 }, { "epoch": 0.21, "grad_norm": 1.7125272481069695, "learning_rate": 9.132055644258473e-06, "loss": 0.7789, "step": 3354 }, { "epoch": 0.21, "grad_norm": 1.6218588228200606, "learning_rate": 9.131471905044294e-06, "loss": 0.6644, "step": 3355 }, { "epoch": 0.21, "grad_norm": 1.4242779116069937, "learning_rate": 9.130887988268131e-06, "loss": 0.7428, "step": 3356 }, { "epoch": 0.21, "grad_norm": 1.701414185236812, "learning_rate": 9.130303893955084e-06, "loss": 0.8685, "step": 3357 }, { "epoch": 0.21, "grad_norm": 1.677326563765616, "learning_rate": 9.129719622130255e-06, "loss": 0.6815, "step": 3358 }, { "epoch": 0.21, "grad_norm": 1.6890495034642519, "learning_rate": 9.129135172818754e-06, "loss": 0.7774, "step": 3359 }, { "epoch": 0.22, "grad_norm": 1.3699639425277794, "learning_rate": 9.1285505460457e-06, "loss": 0.6976, "step": 3360 }, { "epoch": 0.22, "grad_norm": 1.6155777965661442, "learning_rate": 9.127965741836218e-06, "loss": 0.7442, "step": 3361 }, { "epoch": 0.22, "grad_norm": 1.6496826680579144, "learning_rate": 9.127380760215443e-06, "loss": 0.6917, "step": 3362 }, { "epoch": 0.22, "grad_norm": 1.507682690779098, "learning_rate": 9.126795601208516e-06, "loss": 0.6819, "step": 3363 }, { "epoch": 0.22, "grad_norm": 4.662763016157346, "learning_rate": 9.126210264840585e-06, "loss": 0.7336, "step": 3364 }, { "epoch": 0.22, "grad_norm": 1.677935279453051, "learning_rate": 9.125624751136809e-06, "loss": 0.721, "step": 3365 }, { "epoch": 0.22, "grad_norm": 1.5304290587556522, "learning_rate": 9.125039060122348e-06, "loss": 0.692, "step": 3366 }, { "epoch": 0.22, "grad_norm": 1.5934556069984451, "learning_rate": 9.124453191822376e-06, "loss": 0.747, "step": 3367 }, { "epoch": 0.22, "grad_norm": 1.6213395940546838, "learning_rate": 9.123867146262074e-06, "loss": 0.86, "step": 3368 }, { "epoch": 0.22, "grad_norm": 1.5250699650424444, "learning_rate": 9.123280923466627e-06, "loss": 0.7583, "step": 3369 }, { "epoch": 0.22, "grad_norm": 1.5060699365808325, "learning_rate": 9.12269452346123e-06, "loss": 0.7281, "step": 3370 }, { "epoch": 0.22, "grad_norm": 1.4390927309895145, "learning_rate": 9.122107946271086e-06, "loss": 0.7979, "step": 3371 }, { "epoch": 0.22, "grad_norm": 1.7189370086793907, "learning_rate": 9.121521191921403e-06, "loss": 0.7218, "step": 3372 }, { "epoch": 0.22, "grad_norm": 1.4029647409446035, "learning_rate": 9.1209342604374e-06, "loss": 0.7017, "step": 3373 }, { "epoch": 0.22, "grad_norm": 1.096426657535435, "learning_rate": 9.120347151844301e-06, "loss": 0.6001, "step": 3374 }, { "epoch": 0.22, "grad_norm": 1.5239639100199682, "learning_rate": 9.119759866167342e-06, "loss": 0.7102, "step": 3375 }, { "epoch": 0.22, "grad_norm": 1.828245302265213, "learning_rate": 9.11917240343176e-06, "loss": 0.783, "step": 3376 }, { "epoch": 0.22, "grad_norm": 1.6548879914356762, "learning_rate": 9.118584763662803e-06, "loss": 0.7023, "step": 3377 }, { "epoch": 0.22, "grad_norm": 1.4375435168701791, "learning_rate": 9.117996946885727e-06, "loss": 0.6783, "step": 3378 }, { "epoch": 0.22, "grad_norm": 1.6966020544364326, "learning_rate": 9.117408953125794e-06, "loss": 0.7402, "step": 3379 }, { "epoch": 0.22, "grad_norm": 1.1100822409164366, "learning_rate": 9.116820782408279e-06, "loss": 0.6762, "step": 3380 }, { "epoch": 0.22, "grad_norm": 1.6697334237800532, "learning_rate": 9.116232434758456e-06, "loss": 0.7491, "step": 3381 }, { "epoch": 0.22, "grad_norm": 1.6650333012652294, "learning_rate": 9.115643910201612e-06, "loss": 0.8352, "step": 3382 }, { "epoch": 0.22, "grad_norm": 1.2827311344465488, "learning_rate": 9.115055208763042e-06, "loss": 0.7489, "step": 3383 }, { "epoch": 0.22, "grad_norm": 1.6398371869863828, "learning_rate": 9.114466330468045e-06, "loss": 0.7649, "step": 3384 }, { "epoch": 0.22, "grad_norm": 1.49439295779527, "learning_rate": 9.113877275341932e-06, "loss": 0.6555, "step": 3385 }, { "epoch": 0.22, "grad_norm": 1.2097911689092529, "learning_rate": 9.113288043410015e-06, "loss": 0.7231, "step": 3386 }, { "epoch": 0.22, "grad_norm": 1.7578356066010128, "learning_rate": 9.112698634697625e-06, "loss": 0.7385, "step": 3387 }, { "epoch": 0.22, "grad_norm": 1.6343993906862933, "learning_rate": 9.112109049230087e-06, "loss": 0.7101, "step": 3388 }, { "epoch": 0.22, "grad_norm": 2.560870925977393, "learning_rate": 9.111519287032745e-06, "loss": 0.7502, "step": 3389 }, { "epoch": 0.22, "grad_norm": 1.243643143972144, "learning_rate": 9.110929348130942e-06, "loss": 0.7626, "step": 3390 }, { "epoch": 0.22, "grad_norm": 1.5039043038771733, "learning_rate": 9.110339232550034e-06, "loss": 0.687, "step": 3391 }, { "epoch": 0.22, "grad_norm": 1.5575093286897377, "learning_rate": 9.109748940315383e-06, "loss": 0.7518, "step": 3392 }, { "epoch": 0.22, "grad_norm": 1.4890411060791324, "learning_rate": 9.109158471452358e-06, "loss": 0.7515, "step": 3393 }, { "epoch": 0.22, "grad_norm": 1.6363089609889456, "learning_rate": 9.108567825986336e-06, "loss": 0.8146, "step": 3394 }, { "epoch": 0.22, "grad_norm": 1.4809314031978478, "learning_rate": 9.107977003942703e-06, "loss": 0.7665, "step": 3395 }, { "epoch": 0.22, "grad_norm": 1.7575133645286445, "learning_rate": 9.107386005346852e-06, "loss": 0.7576, "step": 3396 }, { "epoch": 0.22, "grad_norm": 1.3472796250392778, "learning_rate": 9.106794830224179e-06, "loss": 0.7242, "step": 3397 }, { "epoch": 0.22, "grad_norm": 1.6499768578535694, "learning_rate": 9.106203478600094e-06, "loss": 0.6878, "step": 3398 }, { "epoch": 0.22, "grad_norm": 2.892135191562675, "learning_rate": 9.105611950500012e-06, "loss": 0.6662, "step": 3399 }, { "epoch": 0.22, "grad_norm": 1.2259794443683503, "learning_rate": 9.105020245949355e-06, "loss": 0.6357, "step": 3400 }, { "epoch": 0.22, "grad_norm": 2.0125470258034692, "learning_rate": 9.104428364973555e-06, "loss": 0.7704, "step": 3401 }, { "epoch": 0.22, "grad_norm": 1.6275675769732507, "learning_rate": 9.103836307598049e-06, "loss": 0.7035, "step": 3402 }, { "epoch": 0.22, "grad_norm": 1.712704038475072, "learning_rate": 9.10324407384828e-06, "loss": 0.7386, "step": 3403 }, { "epoch": 0.22, "grad_norm": 1.7346093310389141, "learning_rate": 9.102651663749703e-06, "loss": 0.7692, "step": 3404 }, { "epoch": 0.22, "grad_norm": 1.5164220026855615, "learning_rate": 9.102059077327779e-06, "loss": 0.7441, "step": 3405 }, { "epoch": 0.22, "grad_norm": 1.5226283444421644, "learning_rate": 9.101466314607974e-06, "loss": 0.6957, "step": 3406 }, { "epoch": 0.22, "grad_norm": 1.1321964333717198, "learning_rate": 9.100873375615767e-06, "loss": 0.7028, "step": 3407 }, { "epoch": 0.22, "grad_norm": 1.9049762813386015, "learning_rate": 9.100280260376639e-06, "loss": 0.6904, "step": 3408 }, { "epoch": 0.22, "grad_norm": 1.5776952025284066, "learning_rate": 9.099686968916081e-06, "loss": 0.7336, "step": 3409 }, { "epoch": 0.22, "grad_norm": 1.568520302400228, "learning_rate": 9.099093501259592e-06, "loss": 0.6848, "step": 3410 }, { "epoch": 0.22, "grad_norm": 1.3845837916477126, "learning_rate": 9.098499857432677e-06, "loss": 0.6002, "step": 3411 }, { "epoch": 0.22, "grad_norm": 1.7205937606014496, "learning_rate": 9.097906037460852e-06, "loss": 0.7114, "step": 3412 }, { "epoch": 0.22, "grad_norm": 1.7799316735407746, "learning_rate": 9.097312041369634e-06, "loss": 0.7555, "step": 3413 }, { "epoch": 0.22, "grad_norm": 1.8311955123970818, "learning_rate": 9.096717869184555e-06, "loss": 0.6699, "step": 3414 }, { "epoch": 0.22, "grad_norm": 1.6805356897117965, "learning_rate": 9.09612352093115e-06, "loss": 0.7023, "step": 3415 }, { "epoch": 0.22, "grad_norm": 1.5456627193559764, "learning_rate": 9.095528996634966e-06, "loss": 0.7214, "step": 3416 }, { "epoch": 0.22, "grad_norm": 1.5365910639191924, "learning_rate": 9.094934296321549e-06, "loss": 0.7315, "step": 3417 }, { "epoch": 0.22, "grad_norm": 1.71560942663501, "learning_rate": 9.094339420016461e-06, "loss": 0.7626, "step": 3418 }, { "epoch": 0.22, "grad_norm": 1.7260113285253154, "learning_rate": 9.093744367745267e-06, "loss": 0.8459, "step": 3419 }, { "epoch": 0.22, "grad_norm": 1.2514661802964702, "learning_rate": 9.093149139533543e-06, "loss": 0.6425, "step": 3420 }, { "epoch": 0.22, "grad_norm": 1.725383305391455, "learning_rate": 9.092553735406871e-06, "loss": 0.7363, "step": 3421 }, { "epoch": 0.22, "grad_norm": 1.5365732335813083, "learning_rate": 9.091958155390839e-06, "loss": 0.6936, "step": 3422 }, { "epoch": 0.22, "grad_norm": 2.0830662992627986, "learning_rate": 9.091362399511043e-06, "loss": 0.7144, "step": 3423 }, { "epoch": 0.22, "grad_norm": 1.579226096220448, "learning_rate": 9.090766467793088e-06, "loss": 0.6786, "step": 3424 }, { "epoch": 0.22, "grad_norm": 1.4844080607279395, "learning_rate": 9.090170360262587e-06, "loss": 0.6423, "step": 3425 }, { "epoch": 0.22, "grad_norm": 1.1423781799932513, "learning_rate": 9.089574076945158e-06, "loss": 0.7426, "step": 3426 }, { "epoch": 0.22, "grad_norm": 2.457183495007198, "learning_rate": 9.088977617866428e-06, "loss": 0.69, "step": 3427 }, { "epoch": 0.22, "grad_norm": 1.8129629389062023, "learning_rate": 9.088380983052033e-06, "loss": 0.7724, "step": 3428 }, { "epoch": 0.22, "grad_norm": 1.2100892389916942, "learning_rate": 9.087784172527614e-06, "loss": 0.6084, "step": 3429 }, { "epoch": 0.22, "grad_norm": 1.7381369412350316, "learning_rate": 9.087187186318821e-06, "loss": 0.7683, "step": 3430 }, { "epoch": 0.22, "grad_norm": 1.5560254061314427, "learning_rate": 9.086590024451312e-06, "loss": 0.8461, "step": 3431 }, { "epoch": 0.22, "grad_norm": 1.5698426425978391, "learning_rate": 9.08599268695075e-06, "loss": 0.7979, "step": 3432 }, { "epoch": 0.22, "grad_norm": 1.410363730480343, "learning_rate": 9.085395173842807e-06, "loss": 0.6929, "step": 3433 }, { "epoch": 0.22, "grad_norm": 1.5036114714720599, "learning_rate": 9.084797485153165e-06, "loss": 0.7913, "step": 3434 }, { "epoch": 0.22, "grad_norm": 1.101969409908887, "learning_rate": 9.08419962090751e-06, "loss": 0.6448, "step": 3435 }, { "epoch": 0.22, "grad_norm": 1.5412120588617158, "learning_rate": 9.083601581131538e-06, "loss": 0.7496, "step": 3436 }, { "epoch": 0.22, "grad_norm": 1.4881471023530035, "learning_rate": 9.08300336585095e-06, "loss": 0.6216, "step": 3437 }, { "epoch": 0.22, "grad_norm": 1.758100804420387, "learning_rate": 9.08240497509146e-06, "loss": 0.7127, "step": 3438 }, { "epoch": 0.22, "grad_norm": 1.6213980064104216, "learning_rate": 9.081806408878778e-06, "loss": 0.8455, "step": 3439 }, { "epoch": 0.22, "grad_norm": 1.645292512280977, "learning_rate": 9.081207667238637e-06, "loss": 0.7755, "step": 3440 }, { "epoch": 0.22, "grad_norm": 1.7075071100015546, "learning_rate": 9.080608750196764e-06, "loss": 0.7768, "step": 3441 }, { "epoch": 0.22, "grad_norm": 1.467446294011381, "learning_rate": 9.080009657778903e-06, "loss": 0.6775, "step": 3442 }, { "epoch": 0.22, "grad_norm": 1.5481435407209532, "learning_rate": 9.079410390010798e-06, "loss": 0.7155, "step": 3443 }, { "epoch": 0.22, "grad_norm": 1.5662258568870828, "learning_rate": 9.078810946918209e-06, "loss": 0.6256, "step": 3444 }, { "epoch": 0.22, "grad_norm": 1.602430304639332, "learning_rate": 9.078211328526896e-06, "loss": 0.8375, "step": 3445 }, { "epoch": 0.22, "grad_norm": 2.3280773130599637, "learning_rate": 9.077611534862628e-06, "loss": 0.7681, "step": 3446 }, { "epoch": 0.22, "grad_norm": 1.803856354455298, "learning_rate": 9.077011565951186e-06, "loss": 0.7651, "step": 3447 }, { "epoch": 0.22, "grad_norm": 1.7237536142534626, "learning_rate": 9.076411421818354e-06, "loss": 0.6939, "step": 3448 }, { "epoch": 0.22, "grad_norm": 1.65233595766242, "learning_rate": 9.075811102489923e-06, "loss": 0.7564, "step": 3449 }, { "epoch": 0.22, "grad_norm": 1.5735257449778195, "learning_rate": 9.075210607991696e-06, "loss": 0.705, "step": 3450 }, { "epoch": 0.22, "grad_norm": 1.5699789665711839, "learning_rate": 9.074609938349481e-06, "loss": 0.7371, "step": 3451 }, { "epoch": 0.22, "grad_norm": 1.5882267832448462, "learning_rate": 9.074009093589093e-06, "loss": 0.7731, "step": 3452 }, { "epoch": 0.22, "grad_norm": 1.252288077995345, "learning_rate": 9.073408073736355e-06, "loss": 0.7085, "step": 3453 }, { "epoch": 0.22, "grad_norm": 1.6739255001603817, "learning_rate": 9.072806878817095e-06, "loss": 0.7033, "step": 3454 }, { "epoch": 0.22, "grad_norm": 1.4715123055046202, "learning_rate": 9.072205508857155e-06, "loss": 0.8036, "step": 3455 }, { "epoch": 0.22, "grad_norm": 1.1579307153283134, "learning_rate": 9.07160396388238e-06, "loss": 0.6683, "step": 3456 }, { "epoch": 0.22, "grad_norm": 1.6761435248962129, "learning_rate": 9.071002243918621e-06, "loss": 0.7371, "step": 3457 }, { "epoch": 0.22, "grad_norm": 1.4324750332545928, "learning_rate": 9.070400348991742e-06, "loss": 0.6465, "step": 3458 }, { "epoch": 0.22, "grad_norm": 1.4546711737887301, "learning_rate": 9.069798279127606e-06, "loss": 0.7578, "step": 3459 }, { "epoch": 0.22, "grad_norm": 1.4290326681540024, "learning_rate": 9.069196034352094e-06, "loss": 0.7501, "step": 3460 }, { "epoch": 0.22, "grad_norm": 1.5015280822161963, "learning_rate": 9.068593614691086e-06, "loss": 0.6594, "step": 3461 }, { "epoch": 0.22, "grad_norm": 1.1551668151473433, "learning_rate": 9.067991020170474e-06, "loss": 0.6581, "step": 3462 }, { "epoch": 0.22, "grad_norm": 1.4774347000219528, "learning_rate": 9.067388250816155e-06, "loss": 0.6595, "step": 3463 }, { "epoch": 0.22, "grad_norm": 1.3978120147422914, "learning_rate": 9.066785306654038e-06, "loss": 0.7153, "step": 3464 }, { "epoch": 0.22, "grad_norm": 1.6096609716532508, "learning_rate": 9.066182187710032e-06, "loss": 0.7089, "step": 3465 }, { "epoch": 0.22, "grad_norm": 1.5340943898973207, "learning_rate": 9.06557889401006e-06, "loss": 0.7987, "step": 3466 }, { "epoch": 0.22, "grad_norm": 1.892888678471218, "learning_rate": 9.064975425580051e-06, "loss": 0.6786, "step": 3467 }, { "epoch": 0.22, "grad_norm": 1.5655525949037332, "learning_rate": 9.06437178244594e-06, "loss": 0.7107, "step": 3468 }, { "epoch": 0.22, "grad_norm": 2.056441005834817, "learning_rate": 9.06376796463367e-06, "loss": 0.7818, "step": 3469 }, { "epoch": 0.22, "grad_norm": 1.197009069760237, "learning_rate": 9.06316397216919e-06, "loss": 0.6784, "step": 3470 }, { "epoch": 0.22, "grad_norm": 1.455653671245112, "learning_rate": 9.062559805078463e-06, "loss": 0.7071, "step": 3471 }, { "epoch": 0.22, "grad_norm": 1.6539773729328866, "learning_rate": 9.061955463387454e-06, "loss": 0.737, "step": 3472 }, { "epoch": 0.22, "grad_norm": 1.2472789468741499, "learning_rate": 9.061350947122129e-06, "loss": 0.7255, "step": 3473 }, { "epoch": 0.22, "grad_norm": 1.481732197955395, "learning_rate": 9.060746256308479e-06, "loss": 0.6939, "step": 3474 }, { "epoch": 0.22, "grad_norm": 2.767031179125294, "learning_rate": 9.060141390972486e-06, "loss": 0.7621, "step": 3475 }, { "epoch": 0.22, "grad_norm": 1.4968119136791396, "learning_rate": 9.059536351140146e-06, "loss": 0.647, "step": 3476 }, { "epoch": 0.22, "grad_norm": 1.522157697409876, "learning_rate": 9.058931136837465e-06, "loss": 0.6484, "step": 3477 }, { "epoch": 0.22, "grad_norm": 1.6082685424488843, "learning_rate": 9.058325748090454e-06, "loss": 0.7597, "step": 3478 }, { "epoch": 0.22, "grad_norm": 1.610885761194085, "learning_rate": 9.05772018492513e-06, "loss": 0.7497, "step": 3479 }, { "epoch": 0.22, "grad_norm": 1.738027411436138, "learning_rate": 9.057114447367517e-06, "loss": 0.7613, "step": 3480 }, { "epoch": 0.22, "grad_norm": 1.5881111166020545, "learning_rate": 9.056508535443652e-06, "loss": 0.7189, "step": 3481 }, { "epoch": 0.22, "grad_norm": 1.6323638015114936, "learning_rate": 9.055902449179573e-06, "loss": 0.7504, "step": 3482 }, { "epoch": 0.22, "grad_norm": 1.574908367426221, "learning_rate": 9.05529618860133e-06, "loss": 0.671, "step": 3483 }, { "epoch": 0.22, "grad_norm": 1.1473084152101665, "learning_rate": 9.054689753734978e-06, "loss": 0.7932, "step": 3484 }, { "epoch": 0.22, "grad_norm": 1.57176284806344, "learning_rate": 9.05408314460658e-06, "loss": 0.6793, "step": 3485 }, { "epoch": 0.22, "grad_norm": 1.5131909157446017, "learning_rate": 9.053476361242208e-06, "loss": 0.7109, "step": 3486 }, { "epoch": 0.22, "grad_norm": 1.7070617131102035, "learning_rate": 9.052869403667938e-06, "loss": 0.6109, "step": 3487 }, { "epoch": 0.22, "grad_norm": 2.0673373927377745, "learning_rate": 9.052262271909859e-06, "loss": 0.7313, "step": 3488 }, { "epoch": 0.22, "grad_norm": 1.5246416394834676, "learning_rate": 9.051654965994062e-06, "loss": 0.7757, "step": 3489 }, { "epoch": 0.22, "grad_norm": 1.5727634032424536, "learning_rate": 9.051047485946648e-06, "loss": 0.7182, "step": 3490 }, { "epoch": 0.22, "grad_norm": 1.7873339095399765, "learning_rate": 9.050439831793726e-06, "loss": 0.8153, "step": 3491 }, { "epoch": 0.22, "grad_norm": 1.6250757905705049, "learning_rate": 9.04983200356141e-06, "loss": 0.6956, "step": 3492 }, { "epoch": 0.22, "grad_norm": 1.8888459770367976, "learning_rate": 9.049224001275825e-06, "loss": 0.7734, "step": 3493 }, { "epoch": 0.22, "grad_norm": 1.3843769929355036, "learning_rate": 9.048615824963102e-06, "loss": 0.6665, "step": 3494 }, { "epoch": 0.22, "grad_norm": 1.5352689984601222, "learning_rate": 9.048007474649377e-06, "loss": 0.7175, "step": 3495 }, { "epoch": 0.22, "grad_norm": 5.626041030909264, "learning_rate": 9.047398950360798e-06, "loss": 0.6986, "step": 3496 }, { "epoch": 0.22, "grad_norm": 1.5425416926361906, "learning_rate": 9.046790252123514e-06, "loss": 0.7153, "step": 3497 }, { "epoch": 0.22, "grad_norm": 1.3456552035509945, "learning_rate": 9.046181379963692e-06, "loss": 0.6467, "step": 3498 }, { "epoch": 0.22, "grad_norm": 1.4784807805296547, "learning_rate": 9.045572333907495e-06, "loss": 0.6824, "step": 3499 }, { "epoch": 0.22, "grad_norm": 1.5071270216514163, "learning_rate": 9.0449631139811e-06, "loss": 0.7081, "step": 3500 }, { "epoch": 0.22, "grad_norm": 1.500934424568577, "learning_rate": 9.04435372021069e-06, "loss": 0.7226, "step": 3501 }, { "epoch": 0.22, "grad_norm": 1.4185184123045151, "learning_rate": 9.043744152622455e-06, "loss": 0.7028, "step": 3502 }, { "epoch": 0.22, "grad_norm": 2.0068815768347306, "learning_rate": 9.043134411242593e-06, "loss": 0.7882, "step": 3503 }, { "epoch": 0.22, "grad_norm": 1.648503906606241, "learning_rate": 9.042524496097312e-06, "loss": 0.8204, "step": 3504 }, { "epoch": 0.22, "grad_norm": 1.256571143209176, "learning_rate": 9.041914407212821e-06, "loss": 0.7097, "step": 3505 }, { "epoch": 0.22, "grad_norm": 1.6566065480875178, "learning_rate": 9.041304144615341e-06, "loss": 0.7559, "step": 3506 }, { "epoch": 0.22, "grad_norm": 1.7060302135280703, "learning_rate": 9.040693708331103e-06, "loss": 0.7459, "step": 3507 }, { "epoch": 0.22, "grad_norm": 2.0111728282550687, "learning_rate": 9.040083098386338e-06, "loss": 0.7902, "step": 3508 }, { "epoch": 0.22, "grad_norm": 1.5403943876779633, "learning_rate": 9.039472314807293e-06, "loss": 0.6956, "step": 3509 }, { "epoch": 0.22, "grad_norm": 2.788036823444021, "learning_rate": 9.038861357620215e-06, "loss": 0.7942, "step": 3510 }, { "epoch": 0.22, "grad_norm": 1.5919427393583214, "learning_rate": 9.038250226851362e-06, "loss": 0.7432, "step": 3511 }, { "epoch": 0.22, "grad_norm": 1.559466527003446, "learning_rate": 9.037638922527002e-06, "loss": 0.7414, "step": 3512 }, { "epoch": 0.22, "grad_norm": 1.7215534406439794, "learning_rate": 9.037027444673402e-06, "loss": 0.6906, "step": 3513 }, { "epoch": 0.22, "grad_norm": 1.5303860646395588, "learning_rate": 9.036415793316848e-06, "loss": 0.7035, "step": 3514 }, { "epoch": 0.22, "grad_norm": 1.3355209881306063, "learning_rate": 9.035803968483625e-06, "loss": 0.746, "step": 3515 }, { "epoch": 0.23, "grad_norm": 1.64175781928047, "learning_rate": 9.035191970200025e-06, "loss": 0.8372, "step": 3516 }, { "epoch": 0.23, "grad_norm": 1.769667880875157, "learning_rate": 9.034579798492356e-06, "loss": 0.7403, "step": 3517 }, { "epoch": 0.23, "grad_norm": 1.1692924019238742, "learning_rate": 9.033967453386924e-06, "loss": 0.6479, "step": 3518 }, { "epoch": 0.23, "grad_norm": 1.6644665770360743, "learning_rate": 9.033354934910049e-06, "loss": 0.7567, "step": 3519 }, { "epoch": 0.23, "grad_norm": 1.9604338527497633, "learning_rate": 9.032742243088053e-06, "loss": 0.684, "step": 3520 }, { "epoch": 0.23, "grad_norm": 1.675335279603412, "learning_rate": 9.032129377947267e-06, "loss": 0.7759, "step": 3521 }, { "epoch": 0.23, "grad_norm": 1.4988944456559357, "learning_rate": 9.031516339514036e-06, "loss": 0.7387, "step": 3522 }, { "epoch": 0.23, "grad_norm": 1.5075100829767907, "learning_rate": 9.030903127814704e-06, "loss": 0.6979, "step": 3523 }, { "epoch": 0.23, "grad_norm": 1.6792716145566888, "learning_rate": 9.030289742875624e-06, "loss": 0.6913, "step": 3524 }, { "epoch": 0.23, "grad_norm": 1.4567096391819256, "learning_rate": 9.029676184723161e-06, "loss": 0.713, "step": 3525 }, { "epoch": 0.23, "grad_norm": 1.6196494618444857, "learning_rate": 9.029062453383683e-06, "loss": 0.717, "step": 3526 }, { "epoch": 0.23, "grad_norm": 1.4899270894160088, "learning_rate": 9.028448548883566e-06, "loss": 0.7903, "step": 3527 }, { "epoch": 0.23, "grad_norm": 2.083413436693024, "learning_rate": 9.027834471249196e-06, "loss": 0.7204, "step": 3528 }, { "epoch": 0.23, "grad_norm": 1.4792259271068053, "learning_rate": 9.027220220506964e-06, "loss": 0.734, "step": 3529 }, { "epoch": 0.23, "grad_norm": 1.537620170277927, "learning_rate": 9.02660579668327e-06, "loss": 0.8262, "step": 3530 }, { "epoch": 0.23, "grad_norm": 1.526967867818837, "learning_rate": 9.025991199804518e-06, "loss": 0.7206, "step": 3531 }, { "epoch": 0.23, "grad_norm": 1.422602902212103, "learning_rate": 9.025376429897126e-06, "loss": 0.7052, "step": 3532 }, { "epoch": 0.23, "grad_norm": 1.5448824986577487, "learning_rate": 9.024761486987512e-06, "loss": 0.6862, "step": 3533 }, { "epoch": 0.23, "grad_norm": 1.5938444246969967, "learning_rate": 9.024146371102107e-06, "loss": 0.7602, "step": 3534 }, { "epoch": 0.23, "grad_norm": 1.4815260496848395, "learning_rate": 9.023531082267347e-06, "loss": 0.6848, "step": 3535 }, { "epoch": 0.23, "grad_norm": 1.4646013244710694, "learning_rate": 9.022915620509677e-06, "loss": 0.771, "step": 3536 }, { "epoch": 0.23, "grad_norm": 1.6959601844827534, "learning_rate": 9.022299985855544e-06, "loss": 0.7357, "step": 3537 }, { "epoch": 0.23, "grad_norm": 1.6171219150382155, "learning_rate": 9.021684178331413e-06, "loss": 0.6831, "step": 3538 }, { "epoch": 0.23, "grad_norm": 1.5939320142452653, "learning_rate": 9.021068197963744e-06, "loss": 0.7045, "step": 3539 }, { "epoch": 0.23, "grad_norm": 1.625950946588217, "learning_rate": 9.020452044779015e-06, "loss": 0.7631, "step": 3540 }, { "epoch": 0.23, "grad_norm": 1.5271337515088526, "learning_rate": 9.019835718803704e-06, "loss": 0.7468, "step": 3541 }, { "epoch": 0.23, "grad_norm": 1.9536125862137166, "learning_rate": 9.019219220064301e-06, "loss": 0.7438, "step": 3542 }, { "epoch": 0.23, "grad_norm": 1.5661383757042853, "learning_rate": 9.018602548587303e-06, "loss": 0.8564, "step": 3543 }, { "epoch": 0.23, "grad_norm": 1.5394197792891993, "learning_rate": 9.01798570439921e-06, "loss": 0.7237, "step": 3544 }, { "epoch": 0.23, "grad_norm": 1.169835331290269, "learning_rate": 9.017368687526535e-06, "loss": 0.7257, "step": 3545 }, { "epoch": 0.23, "grad_norm": 1.4823365272221447, "learning_rate": 9.016751497995795e-06, "loss": 0.6965, "step": 3546 }, { "epoch": 0.23, "grad_norm": 1.509343012876004, "learning_rate": 9.016134135833517e-06, "loss": 0.7443, "step": 3547 }, { "epoch": 0.23, "grad_norm": 1.6727098825330586, "learning_rate": 9.015516601066232e-06, "loss": 0.7088, "step": 3548 }, { "epoch": 0.23, "grad_norm": 1.428918315656599, "learning_rate": 9.01489889372048e-06, "loss": 0.6775, "step": 3549 }, { "epoch": 0.23, "grad_norm": 1.6999249425902583, "learning_rate": 9.014281013822813e-06, "loss": 0.7206, "step": 3550 }, { "epoch": 0.23, "grad_norm": 1.4931054363906489, "learning_rate": 9.01366296139978e-06, "loss": 0.7238, "step": 3551 }, { "epoch": 0.23, "grad_norm": 1.4915876272773225, "learning_rate": 9.01304473647795e-06, "loss": 0.7622, "step": 3552 }, { "epoch": 0.23, "grad_norm": 1.169419019562039, "learning_rate": 9.012426339083887e-06, "loss": 0.7467, "step": 3553 }, { "epoch": 0.23, "grad_norm": 1.5451818779374473, "learning_rate": 9.011807769244174e-06, "loss": 0.6521, "step": 3554 }, { "epoch": 0.23, "grad_norm": 1.2762296264643163, "learning_rate": 9.01118902698539e-06, "loss": 0.7336, "step": 3555 }, { "epoch": 0.23, "grad_norm": 1.603536433871876, "learning_rate": 9.010570112334132e-06, "loss": 0.7633, "step": 3556 }, { "epoch": 0.23, "grad_norm": 1.5166784696218427, "learning_rate": 9.009951025316998e-06, "loss": 0.734, "step": 3557 }, { "epoch": 0.23, "grad_norm": 2.112928100389548, "learning_rate": 9.009331765960593e-06, "loss": 0.7919, "step": 3558 }, { "epoch": 0.23, "grad_norm": 1.1536796725743002, "learning_rate": 9.008712334291536e-06, "loss": 0.69, "step": 3559 }, { "epoch": 0.23, "grad_norm": 1.4036422338831462, "learning_rate": 9.008092730336446e-06, "loss": 0.6752, "step": 3560 }, { "epoch": 0.23, "grad_norm": 1.831145537158797, "learning_rate": 9.007472954121952e-06, "loss": 0.7823, "step": 3561 }, { "epoch": 0.23, "grad_norm": 1.742251027071758, "learning_rate": 9.006853005674692e-06, "loss": 0.7629, "step": 3562 }, { "epoch": 0.23, "grad_norm": 1.195317289860784, "learning_rate": 9.006232885021309e-06, "loss": 0.7662, "step": 3563 }, { "epoch": 0.23, "grad_norm": 1.606730970390377, "learning_rate": 9.005612592188454e-06, "loss": 0.8126, "step": 3564 }, { "epoch": 0.23, "grad_norm": 1.5160044473146677, "learning_rate": 9.004992127202787e-06, "loss": 0.6567, "step": 3565 }, { "epoch": 0.23, "grad_norm": 1.6308959112582129, "learning_rate": 9.004371490090975e-06, "loss": 0.7642, "step": 3566 }, { "epoch": 0.23, "grad_norm": 1.4740592604641396, "learning_rate": 9.00375068087969e-06, "loss": 0.7309, "step": 3567 }, { "epoch": 0.23, "grad_norm": 1.5606301767431625, "learning_rate": 9.003129699595614e-06, "loss": 0.7563, "step": 3568 }, { "epoch": 0.23, "grad_norm": 1.14659163884293, "learning_rate": 9.002508546265433e-06, "loss": 0.6114, "step": 3569 }, { "epoch": 0.23, "grad_norm": 1.574655268547793, "learning_rate": 9.001887220915848e-06, "loss": 0.7176, "step": 3570 }, { "epoch": 0.23, "grad_norm": 1.4484349290420317, "learning_rate": 9.001265723573559e-06, "loss": 0.6072, "step": 3571 }, { "epoch": 0.23, "grad_norm": 1.4702300612070494, "learning_rate": 9.000644054265278e-06, "loss": 0.8277, "step": 3572 }, { "epoch": 0.23, "grad_norm": 1.3340757826838334, "learning_rate": 9.00002221301772e-06, "loss": 0.7219, "step": 3573 }, { "epoch": 0.23, "grad_norm": 1.7822750865065486, "learning_rate": 8.999400199857613e-06, "loss": 0.6497, "step": 3574 }, { "epoch": 0.23, "grad_norm": 1.595926997844005, "learning_rate": 8.998778014811688e-06, "loss": 0.728, "step": 3575 }, { "epoch": 0.23, "grad_norm": 1.5484111919257513, "learning_rate": 8.998155657906689e-06, "loss": 0.6677, "step": 3576 }, { "epoch": 0.23, "grad_norm": 1.5777155423235605, "learning_rate": 8.99753312916936e-06, "loss": 0.7672, "step": 3577 }, { "epoch": 0.23, "grad_norm": 1.6110035883361022, "learning_rate": 8.996910428626458e-06, "loss": 0.7401, "step": 3578 }, { "epoch": 0.23, "grad_norm": 1.549304173727485, "learning_rate": 8.996287556304743e-06, "loss": 0.7483, "step": 3579 }, { "epoch": 0.23, "grad_norm": 1.642962108009144, "learning_rate": 8.995664512230987e-06, "loss": 0.7209, "step": 3580 }, { "epoch": 0.23, "grad_norm": 1.1474055499889244, "learning_rate": 8.995041296431965e-06, "loss": 0.6903, "step": 3581 }, { "epoch": 0.23, "grad_norm": 1.6897085312387836, "learning_rate": 8.994417908934465e-06, "loss": 0.75, "step": 3582 }, { "epoch": 0.23, "grad_norm": 1.1503258041076712, "learning_rate": 8.993794349765275e-06, "loss": 0.597, "step": 3583 }, { "epoch": 0.23, "grad_norm": 1.6595512278441158, "learning_rate": 8.993170618951196e-06, "loss": 0.7393, "step": 3584 }, { "epoch": 0.23, "grad_norm": 2.3454797334668904, "learning_rate": 8.992546716519034e-06, "loss": 0.7063, "step": 3585 }, { "epoch": 0.23, "grad_norm": 1.7128089364456676, "learning_rate": 8.991922642495607e-06, "loss": 0.8497, "step": 3586 }, { "epoch": 0.23, "grad_norm": 1.3785440604091788, "learning_rate": 8.99129839690773e-06, "loss": 0.6761, "step": 3587 }, { "epoch": 0.23, "grad_norm": 1.7128657088752093, "learning_rate": 8.990673979782236e-06, "loss": 0.7482, "step": 3588 }, { "epoch": 0.23, "grad_norm": 1.4929384307350098, "learning_rate": 8.99004939114596e-06, "loss": 0.7495, "step": 3589 }, { "epoch": 0.23, "grad_norm": 1.808537865217127, "learning_rate": 8.989424631025746e-06, "loss": 0.6707, "step": 3590 }, { "epoch": 0.23, "grad_norm": 1.5650543647936115, "learning_rate": 8.988799699448442e-06, "loss": 0.7736, "step": 3591 }, { "epoch": 0.23, "grad_norm": 1.6432742379309644, "learning_rate": 8.98817459644091e-06, "loss": 0.6539, "step": 3592 }, { "epoch": 0.23, "grad_norm": 1.4726303008765442, "learning_rate": 8.987549322030013e-06, "loss": 0.6914, "step": 3593 }, { "epoch": 0.23, "grad_norm": 1.516209286834535, "learning_rate": 8.986923876242626e-06, "loss": 0.6674, "step": 3594 }, { "epoch": 0.23, "grad_norm": 1.6738896075409064, "learning_rate": 8.986298259105629e-06, "loss": 0.751, "step": 3595 }, { "epoch": 0.23, "grad_norm": 1.5592139018760924, "learning_rate": 8.985672470645908e-06, "loss": 0.7694, "step": 3596 }, { "epoch": 0.23, "grad_norm": 1.581501961684235, "learning_rate": 8.98504651089036e-06, "loss": 0.7086, "step": 3597 }, { "epoch": 0.23, "grad_norm": 1.4493862077448232, "learning_rate": 8.984420379865887e-06, "loss": 0.6876, "step": 3598 }, { "epoch": 0.23, "grad_norm": 1.0307107493372272, "learning_rate": 8.983794077599398e-06, "loss": 0.7037, "step": 3599 }, { "epoch": 0.23, "grad_norm": 1.4956960716654026, "learning_rate": 8.98316760411781e-06, "loss": 0.6906, "step": 3600 }, { "epoch": 0.23, "grad_norm": 1.7686243964701849, "learning_rate": 8.98254095944805e-06, "loss": 0.7453, "step": 3601 }, { "epoch": 0.23, "grad_norm": 1.7336069164778605, "learning_rate": 8.981914143617048e-06, "loss": 0.8076, "step": 3602 }, { "epoch": 0.23, "grad_norm": 1.6770210865316968, "learning_rate": 8.981287156651741e-06, "loss": 0.815, "step": 3603 }, { "epoch": 0.23, "grad_norm": 1.655577450327629, "learning_rate": 8.98065999857908e-06, "loss": 0.7785, "step": 3604 }, { "epoch": 0.23, "grad_norm": 1.4592983970442077, "learning_rate": 8.980032669426015e-06, "loss": 0.7042, "step": 3605 }, { "epoch": 0.23, "grad_norm": 1.4860727545118853, "learning_rate": 8.97940516921951e-06, "loss": 0.6926, "step": 3606 }, { "epoch": 0.23, "grad_norm": 1.6465823653511786, "learning_rate": 8.978777497986533e-06, "loss": 0.7496, "step": 3607 }, { "epoch": 0.23, "grad_norm": 1.369633074945675, "learning_rate": 8.97814965575406e-06, "loss": 0.707, "step": 3608 }, { "epoch": 0.23, "grad_norm": 1.798234756040544, "learning_rate": 8.977521642549073e-06, "loss": 0.8591, "step": 3609 }, { "epoch": 0.23, "grad_norm": 1.8349123085309582, "learning_rate": 8.976893458398564e-06, "loss": 0.7904, "step": 3610 }, { "epoch": 0.23, "grad_norm": 1.7750368023676142, "learning_rate": 8.97626510332953e-06, "loss": 0.7272, "step": 3611 }, { "epoch": 0.23, "grad_norm": 1.6079804174579213, "learning_rate": 8.97563657736898e-06, "loss": 0.7253, "step": 3612 }, { "epoch": 0.23, "grad_norm": 1.4078376535233885, "learning_rate": 8.97500788054392e-06, "loss": 0.7175, "step": 3613 }, { "epoch": 0.23, "grad_norm": 1.4945512976890676, "learning_rate": 8.974379012881376e-06, "loss": 0.7641, "step": 3614 }, { "epoch": 0.23, "grad_norm": 1.3562484029416548, "learning_rate": 8.973749974408375e-06, "loss": 0.7215, "step": 3615 }, { "epoch": 0.23, "grad_norm": 1.7293668637955941, "learning_rate": 8.973120765151948e-06, "loss": 0.6957, "step": 3616 }, { "epoch": 0.23, "grad_norm": 0.9742851239821987, "learning_rate": 8.972491385139138e-06, "loss": 0.6524, "step": 3617 }, { "epoch": 0.23, "grad_norm": 1.4528968594398435, "learning_rate": 8.971861834396997e-06, "loss": 0.742, "step": 3618 }, { "epoch": 0.23, "grad_norm": 1.8268601694434947, "learning_rate": 8.971232112952581e-06, "loss": 0.7852, "step": 3619 }, { "epoch": 0.23, "grad_norm": 2.043242376300739, "learning_rate": 8.970602220832954e-06, "loss": 0.7993, "step": 3620 }, { "epoch": 0.23, "grad_norm": 1.7894208524653095, "learning_rate": 8.969972158065185e-06, "loss": 0.7376, "step": 3621 }, { "epoch": 0.23, "grad_norm": 2.0050687795783926, "learning_rate": 8.969341924676356e-06, "loss": 0.7712, "step": 3622 }, { "epoch": 0.23, "grad_norm": 1.4684234643429406, "learning_rate": 8.968711520693551e-06, "loss": 0.6638, "step": 3623 }, { "epoch": 0.23, "grad_norm": 1.5796785297466363, "learning_rate": 8.968080946143864e-06, "loss": 0.6797, "step": 3624 }, { "epoch": 0.23, "grad_norm": 1.4694417388679257, "learning_rate": 8.967450201054397e-06, "loss": 0.7132, "step": 3625 }, { "epoch": 0.23, "grad_norm": 1.4740953721838619, "learning_rate": 8.966819285452257e-06, "loss": 0.7669, "step": 3626 }, { "epoch": 0.23, "grad_norm": 1.5919664338494464, "learning_rate": 8.96618819936456e-06, "loss": 0.8067, "step": 3627 }, { "epoch": 0.23, "grad_norm": 1.7271655141482964, "learning_rate": 8.965556942818427e-06, "loss": 0.7475, "step": 3628 }, { "epoch": 0.23, "grad_norm": 1.5493187570548437, "learning_rate": 8.964925515840991e-06, "loss": 0.6992, "step": 3629 }, { "epoch": 0.23, "grad_norm": 1.404766400310345, "learning_rate": 8.964293918459388e-06, "loss": 0.656, "step": 3630 }, { "epoch": 0.23, "grad_norm": 1.6505529601266122, "learning_rate": 8.963662150700761e-06, "loss": 0.8225, "step": 3631 }, { "epoch": 0.23, "grad_norm": 1.0515372531730662, "learning_rate": 8.963030212592264e-06, "loss": 0.7064, "step": 3632 }, { "epoch": 0.23, "grad_norm": 1.5229301189079172, "learning_rate": 8.962398104161055e-06, "loss": 0.6992, "step": 3633 }, { "epoch": 0.23, "grad_norm": 1.0696094217079686, "learning_rate": 8.961765825434304e-06, "loss": 0.5797, "step": 3634 }, { "epoch": 0.23, "grad_norm": 1.4816392953114803, "learning_rate": 8.96113337643918e-06, "loss": 0.67, "step": 3635 }, { "epoch": 0.23, "grad_norm": 1.6660704429390816, "learning_rate": 8.960500757202869e-06, "loss": 0.7542, "step": 3636 }, { "epoch": 0.23, "grad_norm": 1.3697500567447913, "learning_rate": 8.959867967752556e-06, "loss": 0.7096, "step": 3637 }, { "epoch": 0.23, "grad_norm": 1.380083785575489, "learning_rate": 8.95923500811544e-06, "loss": 0.6888, "step": 3638 }, { "epoch": 0.23, "grad_norm": 1.5205805015105007, "learning_rate": 8.95860187831872e-06, "loss": 0.7591, "step": 3639 }, { "epoch": 0.23, "grad_norm": 1.8661963096723986, "learning_rate": 8.957968578389613e-06, "loss": 0.7855, "step": 3640 }, { "epoch": 0.23, "grad_norm": 1.5872398270319683, "learning_rate": 8.957335108355332e-06, "loss": 0.7182, "step": 3641 }, { "epoch": 0.23, "grad_norm": 0.9911448226920064, "learning_rate": 8.956701468243103e-06, "loss": 0.6584, "step": 3642 }, { "epoch": 0.23, "grad_norm": 1.7257091603357657, "learning_rate": 8.956067658080158e-06, "loss": 0.8, "step": 3643 }, { "epoch": 0.23, "grad_norm": 1.549061892428533, "learning_rate": 8.955433677893742e-06, "loss": 0.6147, "step": 3644 }, { "epoch": 0.23, "grad_norm": 1.7154070074843597, "learning_rate": 8.954799527711094e-06, "loss": 0.6504, "step": 3645 }, { "epoch": 0.23, "grad_norm": 1.7707174770340186, "learning_rate": 8.954165207559475e-06, "loss": 0.7833, "step": 3646 }, { "epoch": 0.23, "grad_norm": 1.534719872692676, "learning_rate": 8.953530717466143e-06, "loss": 0.7972, "step": 3647 }, { "epoch": 0.23, "grad_norm": 2.2386544144470832, "learning_rate": 8.952896057458368e-06, "loss": 0.652, "step": 3648 }, { "epoch": 0.23, "grad_norm": 1.5698557593309939, "learning_rate": 8.952261227563427e-06, "loss": 0.7902, "step": 3649 }, { "epoch": 0.23, "grad_norm": 1.694846598976274, "learning_rate": 8.951626227808603e-06, "loss": 0.7522, "step": 3650 }, { "epoch": 0.23, "grad_norm": 1.192841706291261, "learning_rate": 8.950991058221187e-06, "loss": 0.7188, "step": 3651 }, { "epoch": 0.23, "grad_norm": 2.425817418598619, "learning_rate": 8.95035571882848e-06, "loss": 0.6916, "step": 3652 }, { "epoch": 0.23, "grad_norm": 1.891195760379044, "learning_rate": 8.949720209657781e-06, "loss": 0.7134, "step": 3653 }, { "epoch": 0.23, "grad_norm": 1.5332192623952587, "learning_rate": 8.94908453073641e-06, "loss": 0.6404, "step": 3654 }, { "epoch": 0.23, "grad_norm": 1.6238316723144748, "learning_rate": 8.948448682091684e-06, "loss": 0.8084, "step": 3655 }, { "epoch": 0.23, "grad_norm": 1.2446320475713388, "learning_rate": 8.947812663750929e-06, "loss": 0.5997, "step": 3656 }, { "epoch": 0.23, "grad_norm": 1.777349055437544, "learning_rate": 8.947176475741482e-06, "loss": 0.7362, "step": 3657 }, { "epoch": 0.23, "grad_norm": 1.5599278302218624, "learning_rate": 8.946540118090685e-06, "loss": 0.7142, "step": 3658 }, { "epoch": 0.23, "grad_norm": 1.9333954788878915, "learning_rate": 8.945903590825884e-06, "loss": 0.6594, "step": 3659 }, { "epoch": 0.23, "grad_norm": 1.122846764577312, "learning_rate": 8.94526689397444e-06, "loss": 0.5941, "step": 3660 }, { "epoch": 0.23, "grad_norm": 1.3644494962909675, "learning_rate": 8.944630027563714e-06, "loss": 0.7453, "step": 3661 }, { "epoch": 0.23, "grad_norm": 1.594692696927297, "learning_rate": 8.94399299162108e-06, "loss": 0.7465, "step": 3662 }, { "epoch": 0.23, "grad_norm": 1.6637593225148475, "learning_rate": 8.943355786173913e-06, "loss": 0.7781, "step": 3663 }, { "epoch": 0.23, "grad_norm": 1.6188087571208303, "learning_rate": 8.942718411249601e-06, "loss": 0.7643, "step": 3664 }, { "epoch": 0.23, "grad_norm": 1.8788705933466998, "learning_rate": 8.942080866875538e-06, "loss": 0.776, "step": 3665 }, { "epoch": 0.23, "grad_norm": 1.745083443467094, "learning_rate": 8.941443153079121e-06, "loss": 0.7488, "step": 3666 }, { "epoch": 0.23, "grad_norm": 1.5595963717884744, "learning_rate": 8.94080526988776e-06, "loss": 0.7053, "step": 3667 }, { "epoch": 0.23, "grad_norm": 1.6111971266729406, "learning_rate": 8.940167217328869e-06, "loss": 0.7177, "step": 3668 }, { "epoch": 0.23, "grad_norm": 2.0430665080208055, "learning_rate": 8.93952899542987e-06, "loss": 0.7589, "step": 3669 }, { "epoch": 0.23, "grad_norm": 1.6455066947968968, "learning_rate": 8.938890604218193e-06, "loss": 0.8058, "step": 3670 }, { "epoch": 0.23, "grad_norm": 1.5188059914941383, "learning_rate": 8.938252043721275e-06, "loss": 0.6814, "step": 3671 }, { "epoch": 0.24, "grad_norm": 1.467246348686673, "learning_rate": 8.93761331396656e-06, "loss": 0.653, "step": 3672 }, { "epoch": 0.24, "grad_norm": 1.5175048887212508, "learning_rate": 8.936974414981498e-06, "loss": 0.7072, "step": 3673 }, { "epoch": 0.24, "grad_norm": 1.5327543506624035, "learning_rate": 8.93633534679355e-06, "loss": 0.6974, "step": 3674 }, { "epoch": 0.24, "grad_norm": 1.4299440906563925, "learning_rate": 8.935696109430178e-06, "loss": 0.6663, "step": 3675 }, { "epoch": 0.24, "grad_norm": 1.6990539071784576, "learning_rate": 8.935056702918858e-06, "loss": 0.7124, "step": 3676 }, { "epoch": 0.24, "grad_norm": 1.5167635332638374, "learning_rate": 8.93441712728707e-06, "loss": 0.6692, "step": 3677 }, { "epoch": 0.24, "grad_norm": 1.3085820703982285, "learning_rate": 8.933777382562301e-06, "loss": 0.6435, "step": 3678 }, { "epoch": 0.24, "grad_norm": 1.6544238402718996, "learning_rate": 8.933137468772047e-06, "loss": 0.7264, "step": 3679 }, { "epoch": 0.24, "grad_norm": 1.253836144498713, "learning_rate": 8.932497385943806e-06, "loss": 0.6893, "step": 3680 }, { "epoch": 0.24, "grad_norm": 1.7162509974912081, "learning_rate": 8.931857134105093e-06, "loss": 0.7419, "step": 3681 }, { "epoch": 0.24, "grad_norm": 1.4509536148136202, "learning_rate": 8.931216713283423e-06, "loss": 0.744, "step": 3682 }, { "epoch": 0.24, "grad_norm": 1.7167216681128974, "learning_rate": 8.930576123506318e-06, "loss": 0.7835, "step": 3683 }, { "epoch": 0.24, "grad_norm": 1.5172163604618647, "learning_rate": 8.929935364801312e-06, "loss": 0.7377, "step": 3684 }, { "epoch": 0.24, "grad_norm": 1.5318080283462634, "learning_rate": 8.929294437195942e-06, "loss": 0.6632, "step": 3685 }, { "epoch": 0.24, "grad_norm": 1.4842046561561115, "learning_rate": 8.928653340717752e-06, "loss": 0.7503, "step": 3686 }, { "epoch": 0.24, "grad_norm": 1.5487424210560317, "learning_rate": 8.928012075394297e-06, "loss": 0.6803, "step": 3687 }, { "epoch": 0.24, "grad_norm": 1.2083543603632467, "learning_rate": 8.927370641253137e-06, "loss": 0.6761, "step": 3688 }, { "epoch": 0.24, "grad_norm": 1.7675702802567017, "learning_rate": 8.92672903832184e-06, "loss": 0.7707, "step": 3689 }, { "epoch": 0.24, "grad_norm": 1.3703723103687575, "learning_rate": 8.92608726662798e-06, "loss": 0.7392, "step": 3690 }, { "epoch": 0.24, "grad_norm": 1.6842452931202456, "learning_rate": 8.925445326199138e-06, "loss": 0.8096, "step": 3691 }, { "epoch": 0.24, "grad_norm": 1.4461280148982119, "learning_rate": 8.924803217062906e-06, "loss": 0.7551, "step": 3692 }, { "epoch": 0.24, "grad_norm": 1.0431473473041044, "learning_rate": 8.924160939246877e-06, "loss": 0.6746, "step": 3693 }, { "epoch": 0.24, "grad_norm": 1.6350227925310037, "learning_rate": 8.923518492778659e-06, "loss": 0.7459, "step": 3694 }, { "epoch": 0.24, "grad_norm": 1.8264315218068896, "learning_rate": 8.922875877685859e-06, "loss": 0.6552, "step": 3695 }, { "epoch": 0.24, "grad_norm": 1.468179102685142, "learning_rate": 8.922233093996098e-06, "loss": 0.7021, "step": 3696 }, { "epoch": 0.24, "grad_norm": 1.4731566689961126, "learning_rate": 8.921590141737e-06, "loss": 0.774, "step": 3697 }, { "epoch": 0.24, "grad_norm": 1.4918094309401686, "learning_rate": 8.920947020936196e-06, "loss": 0.686, "step": 3698 }, { "epoch": 0.24, "grad_norm": 1.7144302133761482, "learning_rate": 8.920303731621332e-06, "loss": 0.7594, "step": 3699 }, { "epoch": 0.24, "grad_norm": 1.3945870201733308, "learning_rate": 8.919660273820047e-06, "loss": 0.65, "step": 3700 }, { "epoch": 0.24, "grad_norm": 1.7554450342391075, "learning_rate": 8.919016647560003e-06, "loss": 0.7972, "step": 3701 }, { "epoch": 0.24, "grad_norm": 1.487867218254953, "learning_rate": 8.918372852868858e-06, "loss": 0.6716, "step": 3702 }, { "epoch": 0.24, "grad_norm": 1.5472652990993108, "learning_rate": 8.91772888977428e-06, "loss": 0.7413, "step": 3703 }, { "epoch": 0.24, "grad_norm": 1.5417888658314618, "learning_rate": 8.917084758303948e-06, "loss": 0.7584, "step": 3704 }, { "epoch": 0.24, "grad_norm": 1.4990416657229677, "learning_rate": 8.916440458485542e-06, "loss": 0.6818, "step": 3705 }, { "epoch": 0.24, "grad_norm": 1.4881423579808217, "learning_rate": 8.915795990346758e-06, "loss": 0.8176, "step": 3706 }, { "epoch": 0.24, "grad_norm": 1.4399860501704238, "learning_rate": 8.915151353915288e-06, "loss": 0.6077, "step": 3707 }, { "epoch": 0.24, "grad_norm": 1.5855462753281622, "learning_rate": 8.91450654921884e-06, "loss": 0.796, "step": 3708 }, { "epoch": 0.24, "grad_norm": 1.4529408366349084, "learning_rate": 8.913861576285126e-06, "loss": 0.7063, "step": 3709 }, { "epoch": 0.24, "grad_norm": 2.0151017504036157, "learning_rate": 8.913216435141867e-06, "loss": 0.7992, "step": 3710 }, { "epoch": 0.24, "grad_norm": 1.6250844363034571, "learning_rate": 8.912571125816787e-06, "loss": 0.7906, "step": 3711 }, { "epoch": 0.24, "grad_norm": 1.5079536634022057, "learning_rate": 8.911925648337622e-06, "loss": 0.6665, "step": 3712 }, { "epoch": 0.24, "grad_norm": 1.7349618391058454, "learning_rate": 8.911280002732112e-06, "loss": 0.7588, "step": 3713 }, { "epoch": 0.24, "grad_norm": 1.0887727627366999, "learning_rate": 8.910634189028006e-06, "loss": 0.7033, "step": 3714 }, { "epoch": 0.24, "grad_norm": 1.484357622151063, "learning_rate": 8.909988207253062e-06, "loss": 0.7066, "step": 3715 }, { "epoch": 0.24, "grad_norm": 1.7407222134304199, "learning_rate": 8.90934205743504e-06, "loss": 0.6962, "step": 3716 }, { "epoch": 0.24, "grad_norm": 1.6381575601150202, "learning_rate": 8.90869573960171e-06, "loss": 0.7507, "step": 3717 }, { "epoch": 0.24, "grad_norm": 1.6590121492348648, "learning_rate": 8.90804925378085e-06, "loss": 0.7264, "step": 3718 }, { "epoch": 0.24, "grad_norm": 1.3514820238141934, "learning_rate": 8.907402600000248e-06, "loss": 0.6653, "step": 3719 }, { "epoch": 0.24, "grad_norm": 1.7972031397751802, "learning_rate": 8.90675577828769e-06, "loss": 0.7598, "step": 3720 }, { "epoch": 0.24, "grad_norm": 1.6430498436263128, "learning_rate": 8.906108788670979e-06, "loss": 0.7819, "step": 3721 }, { "epoch": 0.24, "grad_norm": 1.727515214709708, "learning_rate": 8.90546163117792e-06, "loss": 0.6939, "step": 3722 }, { "epoch": 0.24, "grad_norm": 1.6000670150683196, "learning_rate": 8.904814305836326e-06, "loss": 0.6353, "step": 3723 }, { "epoch": 0.24, "grad_norm": 1.734920736678378, "learning_rate": 8.904166812674019e-06, "loss": 0.6591, "step": 3724 }, { "epoch": 0.24, "grad_norm": 1.5554988216886767, "learning_rate": 8.903519151718826e-06, "loss": 0.6894, "step": 3725 }, { "epoch": 0.24, "grad_norm": 1.400221616085427, "learning_rate": 8.902871322998582e-06, "loss": 0.7591, "step": 3726 }, { "epoch": 0.24, "grad_norm": 1.590323356545595, "learning_rate": 8.90222332654113e-06, "loss": 0.7663, "step": 3727 }, { "epoch": 0.24, "grad_norm": 1.3123590539599095, "learning_rate": 8.901575162374318e-06, "loss": 0.776, "step": 3728 }, { "epoch": 0.24, "grad_norm": 1.488395810459498, "learning_rate": 8.900926830526006e-06, "loss": 0.621, "step": 3729 }, { "epoch": 0.24, "grad_norm": 1.8339101583307496, "learning_rate": 8.900278331024055e-06, "loss": 0.8228, "step": 3730 }, { "epoch": 0.24, "grad_norm": 1.51481729013332, "learning_rate": 8.899629663896336e-06, "loss": 0.6968, "step": 3731 }, { "epoch": 0.24, "grad_norm": 1.871268198853193, "learning_rate": 8.89898082917073e-06, "loss": 0.5946, "step": 3732 }, { "epoch": 0.24, "grad_norm": 1.5275614546322716, "learning_rate": 8.898331826875119e-06, "loss": 0.6807, "step": 3733 }, { "epoch": 0.24, "grad_norm": 1.466098096577545, "learning_rate": 8.897682657037398e-06, "loss": 0.6721, "step": 3734 }, { "epoch": 0.24, "grad_norm": 1.43128411179379, "learning_rate": 8.897033319685466e-06, "loss": 0.7204, "step": 3735 }, { "epoch": 0.24, "grad_norm": 1.6394636753018101, "learning_rate": 8.896383814847232e-06, "loss": 0.6902, "step": 3736 }, { "epoch": 0.24, "grad_norm": 1.1246845578484785, "learning_rate": 8.89573414255061e-06, "loss": 0.6585, "step": 3737 }, { "epoch": 0.24, "grad_norm": 1.4040850239466793, "learning_rate": 8.895084302823518e-06, "loss": 0.6125, "step": 3738 }, { "epoch": 0.24, "grad_norm": 1.0013184437641098, "learning_rate": 8.89443429569389e-06, "loss": 0.656, "step": 3739 }, { "epoch": 0.24, "grad_norm": 1.6451711527867825, "learning_rate": 8.893784121189656e-06, "loss": 0.7192, "step": 3740 }, { "epoch": 0.24, "grad_norm": 1.553082826898479, "learning_rate": 8.893133779338765e-06, "loss": 0.7072, "step": 3741 }, { "epoch": 0.24, "grad_norm": 1.6005785248959152, "learning_rate": 8.892483270169165e-06, "loss": 0.7386, "step": 3742 }, { "epoch": 0.24, "grad_norm": 1.4913488284918641, "learning_rate": 8.89183259370881e-06, "loss": 0.6623, "step": 3743 }, { "epoch": 0.24, "grad_norm": 1.455383967604706, "learning_rate": 8.891181749985672e-06, "loss": 0.7443, "step": 3744 }, { "epoch": 0.24, "grad_norm": 1.6872891211763785, "learning_rate": 8.890530739027718e-06, "loss": 0.7452, "step": 3745 }, { "epoch": 0.24, "grad_norm": 1.4786143811134724, "learning_rate": 8.889879560862926e-06, "loss": 0.7069, "step": 3746 }, { "epoch": 0.24, "grad_norm": 1.7628860480347706, "learning_rate": 8.889228215519286e-06, "loss": 0.7158, "step": 3747 }, { "epoch": 0.24, "grad_norm": 1.4455136979772458, "learning_rate": 8.888576703024789e-06, "loss": 0.7213, "step": 3748 }, { "epoch": 0.24, "grad_norm": 1.8377191241338344, "learning_rate": 8.887925023407437e-06, "loss": 0.7379, "step": 3749 }, { "epoch": 0.24, "grad_norm": 1.5755858677286647, "learning_rate": 8.887273176695237e-06, "loss": 0.7224, "step": 3750 }, { "epoch": 0.24, "grad_norm": 1.7481568743845328, "learning_rate": 8.886621162916204e-06, "loss": 0.8061, "step": 3751 }, { "epoch": 0.24, "grad_norm": 1.4472821237389188, "learning_rate": 8.88596898209836e-06, "loss": 0.6474, "step": 3752 }, { "epoch": 0.24, "grad_norm": 1.3704205075368543, "learning_rate": 8.885316634269735e-06, "loss": 0.6909, "step": 3753 }, { "epoch": 0.24, "grad_norm": 1.5474567231624763, "learning_rate": 8.884664119458366e-06, "loss": 0.6903, "step": 3754 }, { "epoch": 0.24, "grad_norm": 1.6724769900238097, "learning_rate": 8.884011437692295e-06, "loss": 0.7341, "step": 3755 }, { "epoch": 0.24, "grad_norm": 1.6208016973089887, "learning_rate": 8.883358588999573e-06, "loss": 0.7139, "step": 3756 }, { "epoch": 0.24, "grad_norm": 1.393822658734477, "learning_rate": 8.882705573408258e-06, "loss": 0.6353, "step": 3757 }, { "epoch": 0.24, "grad_norm": 1.5625844093340464, "learning_rate": 8.88205239094642e-06, "loss": 0.6519, "step": 3758 }, { "epoch": 0.24, "grad_norm": 1.5973588465510904, "learning_rate": 8.881399041642125e-06, "loss": 0.795, "step": 3759 }, { "epoch": 0.24, "grad_norm": 1.634779716044831, "learning_rate": 8.880745525523455e-06, "loss": 0.7049, "step": 3760 }, { "epoch": 0.24, "grad_norm": 1.5539249474211088, "learning_rate": 8.880091842618498e-06, "loss": 0.702, "step": 3761 }, { "epoch": 0.24, "grad_norm": 1.510786330619006, "learning_rate": 8.879437992955346e-06, "loss": 0.6646, "step": 3762 }, { "epoch": 0.24, "grad_norm": 1.6049113587136754, "learning_rate": 8.878783976562102e-06, "loss": 0.666, "step": 3763 }, { "epoch": 0.24, "grad_norm": 1.5512259969702853, "learning_rate": 8.878129793466872e-06, "loss": 0.7246, "step": 3764 }, { "epoch": 0.24, "grad_norm": 1.809397204709126, "learning_rate": 8.877475443697773e-06, "loss": 0.706, "step": 3765 }, { "epoch": 0.24, "grad_norm": 1.4895860843837982, "learning_rate": 8.876820927282928e-06, "loss": 0.7313, "step": 3766 }, { "epoch": 0.24, "grad_norm": 1.547494181517237, "learning_rate": 8.876166244250463e-06, "loss": 0.682, "step": 3767 }, { "epoch": 0.24, "grad_norm": 1.5274333233940667, "learning_rate": 8.87551139462852e-06, "loss": 0.7304, "step": 3768 }, { "epoch": 0.24, "grad_norm": 1.482791512234593, "learning_rate": 8.87485637844524e-06, "loss": 0.7275, "step": 3769 }, { "epoch": 0.24, "grad_norm": 1.4320438881354396, "learning_rate": 8.874201195728777e-06, "loss": 0.724, "step": 3770 }, { "epoch": 0.24, "grad_norm": 1.6355686399911082, "learning_rate": 8.873545846507286e-06, "loss": 0.6725, "step": 3771 }, { "epoch": 0.24, "grad_norm": 1.452536507933515, "learning_rate": 8.872890330808933e-06, "loss": 0.7161, "step": 3772 }, { "epoch": 0.24, "grad_norm": 1.113876557459477, "learning_rate": 8.872234648661893e-06, "loss": 0.7331, "step": 3773 }, { "epoch": 0.24, "grad_norm": 1.4963802757654858, "learning_rate": 8.871578800094345e-06, "loss": 0.6787, "step": 3774 }, { "epoch": 0.24, "grad_norm": 1.7542105045514766, "learning_rate": 8.870922785134473e-06, "loss": 0.776, "step": 3775 }, { "epoch": 0.24, "grad_norm": 1.8078977881638323, "learning_rate": 8.870266603810476e-06, "loss": 0.7413, "step": 3776 }, { "epoch": 0.24, "grad_norm": 1.5945853447415665, "learning_rate": 8.869610256150552e-06, "loss": 0.6969, "step": 3777 }, { "epoch": 0.24, "grad_norm": 1.6243444474243878, "learning_rate": 8.86895374218291e-06, "loss": 0.6686, "step": 3778 }, { "epoch": 0.24, "grad_norm": 1.04083096659626, "learning_rate": 8.868297061935767e-06, "loss": 0.6934, "step": 3779 }, { "epoch": 0.24, "grad_norm": 1.824477401079771, "learning_rate": 8.867640215437344e-06, "loss": 0.8439, "step": 3780 }, { "epoch": 0.24, "grad_norm": 1.7089752835599246, "learning_rate": 8.86698320271587e-06, "loss": 0.6567, "step": 3781 }, { "epoch": 0.24, "grad_norm": 1.561602277009136, "learning_rate": 8.866326023799586e-06, "loss": 0.7264, "step": 3782 }, { "epoch": 0.24, "grad_norm": 1.0030896414237616, "learning_rate": 8.865668678716734e-06, "loss": 0.6348, "step": 3783 }, { "epoch": 0.24, "grad_norm": 1.6569816676329023, "learning_rate": 8.865011167495564e-06, "loss": 0.7326, "step": 3784 }, { "epoch": 0.24, "grad_norm": 1.6658889681086448, "learning_rate": 8.864353490164335e-06, "loss": 0.7218, "step": 3785 }, { "epoch": 0.24, "grad_norm": 1.6276397330931158, "learning_rate": 8.863695646751313e-06, "loss": 0.7591, "step": 3786 }, { "epoch": 0.24, "grad_norm": 1.2940932297338357, "learning_rate": 8.863037637284773e-06, "loss": 0.6545, "step": 3787 }, { "epoch": 0.24, "grad_norm": 1.514642009139456, "learning_rate": 8.862379461792992e-06, "loss": 0.7478, "step": 3788 }, { "epoch": 0.24, "grad_norm": 1.8100237033938664, "learning_rate": 8.861721120304257e-06, "loss": 0.7171, "step": 3789 }, { "epoch": 0.24, "grad_norm": 1.6745483160235706, "learning_rate": 8.861062612846862e-06, "loss": 0.6966, "step": 3790 }, { "epoch": 0.24, "grad_norm": 1.3262846402055712, "learning_rate": 8.86040393944911e-06, "loss": 0.7925, "step": 3791 }, { "epoch": 0.24, "grad_norm": 1.1165363235111236, "learning_rate": 8.859745100139307e-06, "loss": 0.6309, "step": 3792 }, { "epoch": 0.24, "grad_norm": 1.6053542904334026, "learning_rate": 8.859086094945773e-06, "loss": 0.7501, "step": 3793 }, { "epoch": 0.24, "grad_norm": 1.5738779947313015, "learning_rate": 8.858426923896826e-06, "loss": 0.7146, "step": 3794 }, { "epoch": 0.24, "grad_norm": 1.5941262567942427, "learning_rate": 8.857767587020798e-06, "loss": 0.7675, "step": 3795 }, { "epoch": 0.24, "grad_norm": 2.026147431998919, "learning_rate": 8.857108084346025e-06, "loss": 0.7045, "step": 3796 }, { "epoch": 0.24, "grad_norm": 1.2320182304326075, "learning_rate": 8.856448415900851e-06, "loss": 0.6037, "step": 3797 }, { "epoch": 0.24, "grad_norm": 1.717518587841148, "learning_rate": 8.855788581713629e-06, "loss": 0.7491, "step": 3798 }, { "epoch": 0.24, "grad_norm": 1.4916627796496922, "learning_rate": 8.855128581812714e-06, "loss": 0.8052, "step": 3799 }, { "epoch": 0.24, "grad_norm": 1.5783505335993249, "learning_rate": 8.854468416226473e-06, "loss": 0.7077, "step": 3800 }, { "epoch": 0.24, "grad_norm": 1.3892960314665344, "learning_rate": 8.853808084983282e-06, "loss": 0.6966, "step": 3801 }, { "epoch": 0.24, "grad_norm": 1.4321048020817164, "learning_rate": 8.853147588111515e-06, "loss": 0.6615, "step": 3802 }, { "epoch": 0.24, "grad_norm": 1.399866925541836, "learning_rate": 8.852486925639562e-06, "loss": 0.6648, "step": 3803 }, { "epoch": 0.24, "grad_norm": 1.7731339452448194, "learning_rate": 8.851826097595815e-06, "loss": 0.7032, "step": 3804 }, { "epoch": 0.24, "grad_norm": 1.8578518783925202, "learning_rate": 8.851165104008678e-06, "loss": 0.7314, "step": 3805 }, { "epoch": 0.24, "grad_norm": 1.5270347589247444, "learning_rate": 8.850503944906557e-06, "loss": 0.6703, "step": 3806 }, { "epoch": 0.24, "grad_norm": 1.5507753120127816, "learning_rate": 8.849842620317865e-06, "loss": 0.7144, "step": 3807 }, { "epoch": 0.24, "grad_norm": 1.6783191804317654, "learning_rate": 8.84918113027103e-06, "loss": 0.6167, "step": 3808 }, { "epoch": 0.24, "grad_norm": 1.2298365679876355, "learning_rate": 8.848519474794476e-06, "loss": 0.6181, "step": 3809 }, { "epoch": 0.24, "grad_norm": 1.5959159964475398, "learning_rate": 8.847857653916643e-06, "loss": 0.784, "step": 3810 }, { "epoch": 0.24, "grad_norm": 1.3911388803020284, "learning_rate": 8.847195667665974e-06, "loss": 0.6515, "step": 3811 }, { "epoch": 0.24, "grad_norm": 1.5876174101885416, "learning_rate": 8.84653351607092e-06, "loss": 0.7342, "step": 3812 }, { "epoch": 0.24, "grad_norm": 1.4954646293531446, "learning_rate": 8.845871199159935e-06, "loss": 0.6897, "step": 3813 }, { "epoch": 0.24, "grad_norm": 1.5588614716787277, "learning_rate": 8.845208716961489e-06, "loss": 0.7152, "step": 3814 }, { "epoch": 0.24, "grad_norm": 1.5302873803260915, "learning_rate": 8.844546069504051e-06, "loss": 0.701, "step": 3815 }, { "epoch": 0.24, "grad_norm": 1.4016701387222967, "learning_rate": 8.843883256816104e-06, "loss": 0.6579, "step": 3816 }, { "epoch": 0.24, "grad_norm": 1.6211183258720243, "learning_rate": 8.843220278926128e-06, "loss": 0.8171, "step": 3817 }, { "epoch": 0.24, "grad_norm": 1.6539171930924663, "learning_rate": 8.842557135862624e-06, "loss": 0.8559, "step": 3818 }, { "epoch": 0.24, "grad_norm": 1.4586170006442718, "learning_rate": 8.841893827654087e-06, "loss": 0.7235, "step": 3819 }, { "epoch": 0.24, "grad_norm": 1.6431230386917695, "learning_rate": 8.841230354329026e-06, "loss": 0.7946, "step": 3820 }, { "epoch": 0.24, "grad_norm": 1.5110917472380063, "learning_rate": 8.840566715915955e-06, "loss": 0.7282, "step": 3821 }, { "epoch": 0.24, "grad_norm": 1.5656270705196804, "learning_rate": 8.839902912443398e-06, "loss": 0.7027, "step": 3822 }, { "epoch": 0.24, "grad_norm": 1.315213732759046, "learning_rate": 8.839238943939883e-06, "loss": 0.7408, "step": 3823 }, { "epoch": 0.24, "grad_norm": 1.2846098875339926, "learning_rate": 8.838574810433945e-06, "loss": 0.707, "step": 3824 }, { "epoch": 0.24, "grad_norm": 1.5797818409168143, "learning_rate": 8.837910511954128e-06, "loss": 0.7945, "step": 3825 }, { "epoch": 0.24, "grad_norm": 1.566148178402512, "learning_rate": 8.83724604852898e-06, "loss": 0.6879, "step": 3826 }, { "epoch": 0.24, "grad_norm": 1.6477227116531212, "learning_rate": 8.836581420187062e-06, "loss": 0.7563, "step": 3827 }, { "epoch": 0.25, "grad_norm": 1.47259251057982, "learning_rate": 8.835916626956935e-06, "loss": 0.7952, "step": 3828 }, { "epoch": 0.25, "grad_norm": 1.5904255673420304, "learning_rate": 8.835251668867172e-06, "loss": 0.6832, "step": 3829 }, { "epoch": 0.25, "grad_norm": 1.4689805700622156, "learning_rate": 8.834586545946353e-06, "loss": 0.752, "step": 3830 }, { "epoch": 0.25, "grad_norm": 1.5133200018974582, "learning_rate": 8.833921258223059e-06, "loss": 0.6612, "step": 3831 }, { "epoch": 0.25, "grad_norm": 1.2574913047602283, "learning_rate": 8.833255805725887e-06, "loss": 0.662, "step": 3832 }, { "epoch": 0.25, "grad_norm": 1.583446742222271, "learning_rate": 8.832590188483437e-06, "loss": 0.7211, "step": 3833 }, { "epoch": 0.25, "grad_norm": 1.196458026978178, "learning_rate": 8.831924406524312e-06, "loss": 0.5682, "step": 3834 }, { "epoch": 0.25, "grad_norm": 1.1236264840899024, "learning_rate": 8.831258459877128e-06, "loss": 0.632, "step": 3835 }, { "epoch": 0.25, "grad_norm": 1.7244952075422215, "learning_rate": 8.830592348570505e-06, "loss": 0.6836, "step": 3836 }, { "epoch": 0.25, "grad_norm": 1.6381701429215652, "learning_rate": 8.829926072633075e-06, "loss": 0.8482, "step": 3837 }, { "epoch": 0.25, "grad_norm": 1.9686665714294533, "learning_rate": 8.829259632093468e-06, "loss": 0.7974, "step": 3838 }, { "epoch": 0.25, "grad_norm": 1.6894903629212152, "learning_rate": 8.828593026980328e-06, "loss": 0.8145, "step": 3839 }, { "epoch": 0.25, "grad_norm": 1.7560454089604598, "learning_rate": 8.827926257322306e-06, "loss": 0.7871, "step": 3840 }, { "epoch": 0.25, "grad_norm": 2.099701698269772, "learning_rate": 8.827259323148056e-06, "loss": 0.7078, "step": 3841 }, { "epoch": 0.25, "grad_norm": 1.5848255962411255, "learning_rate": 8.826592224486243e-06, "loss": 0.6575, "step": 3842 }, { "epoch": 0.25, "grad_norm": 1.7288923373077665, "learning_rate": 8.825924961365538e-06, "loss": 0.7068, "step": 3843 }, { "epoch": 0.25, "grad_norm": 1.6541945524783563, "learning_rate": 8.825257533814614e-06, "loss": 0.7594, "step": 3844 }, { "epoch": 0.25, "grad_norm": 1.5857696069935105, "learning_rate": 8.824589941862164e-06, "loss": 0.6515, "step": 3845 }, { "epoch": 0.25, "grad_norm": 1.494534085947674, "learning_rate": 8.823922185536872e-06, "loss": 0.7016, "step": 3846 }, { "epoch": 0.25, "grad_norm": 1.6492889849503, "learning_rate": 8.82325426486744e-06, "loss": 0.6912, "step": 3847 }, { "epoch": 0.25, "grad_norm": 1.4655117654274006, "learning_rate": 8.822586179882574e-06, "loss": 0.6873, "step": 3848 }, { "epoch": 0.25, "grad_norm": 2.21364064072475, "learning_rate": 8.821917930610987e-06, "loss": 0.8015, "step": 3849 }, { "epoch": 0.25, "grad_norm": 1.4063734918514947, "learning_rate": 8.821249517081397e-06, "loss": 0.675, "step": 3850 }, { "epoch": 0.25, "grad_norm": 1.7097590237768419, "learning_rate": 8.820580939322532e-06, "loss": 0.7306, "step": 3851 }, { "epoch": 0.25, "grad_norm": 1.5409585945848419, "learning_rate": 8.819912197363128e-06, "loss": 0.6491, "step": 3852 }, { "epoch": 0.25, "grad_norm": 2.0207710457014505, "learning_rate": 8.819243291231922e-06, "loss": 0.6178, "step": 3853 }, { "epoch": 0.25, "grad_norm": 1.5446490125701329, "learning_rate": 8.818574220957666e-06, "loss": 0.7338, "step": 3854 }, { "epoch": 0.25, "grad_norm": 1.88308375734266, "learning_rate": 8.817904986569115e-06, "loss": 0.7536, "step": 3855 }, { "epoch": 0.25, "grad_norm": 1.7255957707606735, "learning_rate": 8.81723558809503e-06, "loss": 0.6876, "step": 3856 }, { "epoch": 0.25, "grad_norm": 1.3888373253079853, "learning_rate": 8.81656602556418e-06, "loss": 0.7105, "step": 3857 }, { "epoch": 0.25, "grad_norm": 1.617506144584485, "learning_rate": 8.81589629900534e-06, "loss": 0.7165, "step": 3858 }, { "epoch": 0.25, "grad_norm": 1.4851587769958023, "learning_rate": 8.815226408447298e-06, "loss": 0.7709, "step": 3859 }, { "epoch": 0.25, "grad_norm": 1.773270298214151, "learning_rate": 8.81455635391884e-06, "loss": 0.8111, "step": 3860 }, { "epoch": 0.25, "grad_norm": 1.5882520305326828, "learning_rate": 8.813886135448766e-06, "loss": 0.7472, "step": 3861 }, { "epoch": 0.25, "grad_norm": 1.6116282027094948, "learning_rate": 8.813215753065882e-06, "loss": 0.706, "step": 3862 }, { "epoch": 0.25, "grad_norm": 1.3498106785096136, "learning_rate": 8.812545206798995e-06, "loss": 0.7085, "step": 3863 }, { "epoch": 0.25, "grad_norm": 1.577517447455562, "learning_rate": 8.811874496676927e-06, "loss": 0.7797, "step": 3864 }, { "epoch": 0.25, "grad_norm": 1.4879533920721746, "learning_rate": 8.811203622728504e-06, "loss": 0.6964, "step": 3865 }, { "epoch": 0.25, "grad_norm": 1.9762432536214158, "learning_rate": 8.810532584982557e-06, "loss": 0.7723, "step": 3866 }, { "epoch": 0.25, "grad_norm": 1.5102516329547955, "learning_rate": 8.809861383467926e-06, "loss": 0.6139, "step": 3867 }, { "epoch": 0.25, "grad_norm": 1.1618825031178381, "learning_rate": 8.80919001821346e-06, "loss": 0.6997, "step": 3868 }, { "epoch": 0.25, "grad_norm": 1.0761371265846675, "learning_rate": 8.808518489248009e-06, "loss": 0.6504, "step": 3869 }, { "epoch": 0.25, "grad_norm": 1.5558104135185271, "learning_rate": 8.807846796600436e-06, "loss": 0.7281, "step": 3870 }, { "epoch": 0.25, "grad_norm": 1.904942492771743, "learning_rate": 8.807174940299613e-06, "loss": 0.6982, "step": 3871 }, { "epoch": 0.25, "grad_norm": 1.6480273702559065, "learning_rate": 8.806502920374406e-06, "loss": 0.7272, "step": 3872 }, { "epoch": 0.25, "grad_norm": 1.7146214314285668, "learning_rate": 8.805830736853705e-06, "loss": 0.8162, "step": 3873 }, { "epoch": 0.25, "grad_norm": 1.4477441998694955, "learning_rate": 8.805158389766395e-06, "loss": 0.6537, "step": 3874 }, { "epoch": 0.25, "grad_norm": 1.4972798273471448, "learning_rate": 8.804485879141375e-06, "loss": 0.6889, "step": 3875 }, { "epoch": 0.25, "grad_norm": 1.5510636633654435, "learning_rate": 8.803813205007544e-06, "loss": 0.6745, "step": 3876 }, { "epoch": 0.25, "grad_norm": 1.0296226342654182, "learning_rate": 8.803140367393815e-06, "loss": 0.5714, "step": 3877 }, { "epoch": 0.25, "grad_norm": 1.3482252902879932, "learning_rate": 8.802467366329106e-06, "loss": 0.6555, "step": 3878 }, { "epoch": 0.25, "grad_norm": 1.562918795398622, "learning_rate": 8.801794201842337e-06, "loss": 0.7288, "step": 3879 }, { "epoch": 0.25, "grad_norm": 1.5439517469061033, "learning_rate": 8.801120873962445e-06, "loss": 0.7367, "step": 3880 }, { "epoch": 0.25, "grad_norm": 1.6776382686082252, "learning_rate": 8.800447382718362e-06, "loss": 0.7147, "step": 3881 }, { "epoch": 0.25, "grad_norm": 1.2592952306326481, "learning_rate": 8.799773728139038e-06, "loss": 0.6295, "step": 3882 }, { "epoch": 0.25, "grad_norm": 1.4767428995155876, "learning_rate": 8.799099910253424e-06, "loss": 0.7274, "step": 3883 }, { "epoch": 0.25, "grad_norm": 1.382121240715917, "learning_rate": 8.798425929090477e-06, "loss": 0.6684, "step": 3884 }, { "epoch": 0.25, "grad_norm": 4.696155645325468, "learning_rate": 8.797751784679167e-06, "loss": 0.7354, "step": 3885 }, { "epoch": 0.25, "grad_norm": 1.1847143390729893, "learning_rate": 8.797077477048464e-06, "loss": 0.6108, "step": 3886 }, { "epoch": 0.25, "grad_norm": 1.5499777377764574, "learning_rate": 8.796403006227352e-06, "loss": 0.6827, "step": 3887 }, { "epoch": 0.25, "grad_norm": 1.633008425458606, "learning_rate": 8.795728372244813e-06, "loss": 0.7574, "step": 3888 }, { "epoch": 0.25, "grad_norm": 1.5439758890316913, "learning_rate": 8.795053575129846e-06, "loss": 0.7525, "step": 3889 }, { "epoch": 0.25, "grad_norm": 1.4974143026073903, "learning_rate": 8.794378614911452e-06, "loss": 0.6171, "step": 3890 }, { "epoch": 0.25, "grad_norm": 1.4472602170172562, "learning_rate": 8.793703491618638e-06, "loss": 0.5953, "step": 3891 }, { "epoch": 0.25, "grad_norm": 1.2231606039009106, "learning_rate": 8.793028205280419e-06, "loss": 0.6135, "step": 3892 }, { "epoch": 0.25, "grad_norm": 1.3745171784083334, "learning_rate": 8.792352755925817e-06, "loss": 0.7433, "step": 3893 }, { "epoch": 0.25, "grad_norm": 1.393380542170846, "learning_rate": 8.791677143583863e-06, "loss": 0.6828, "step": 3894 }, { "epoch": 0.25, "grad_norm": 1.548462385496319, "learning_rate": 8.791001368283593e-06, "loss": 0.7767, "step": 3895 }, { "epoch": 0.25, "grad_norm": 1.643558891033662, "learning_rate": 8.79032543005405e-06, "loss": 0.7521, "step": 3896 }, { "epoch": 0.25, "grad_norm": 1.4330073701714114, "learning_rate": 8.789649328924286e-06, "loss": 0.7108, "step": 3897 }, { "epoch": 0.25, "grad_norm": 1.5155457803919055, "learning_rate": 8.788973064923355e-06, "loss": 0.754, "step": 3898 }, { "epoch": 0.25, "grad_norm": 1.9657406606100114, "learning_rate": 8.788296638080325e-06, "loss": 0.7816, "step": 3899 }, { "epoch": 0.25, "grad_norm": 1.5296712297101434, "learning_rate": 8.787620048424264e-06, "loss": 0.6839, "step": 3900 }, { "epoch": 0.25, "grad_norm": 1.5432734086577415, "learning_rate": 8.786943295984254e-06, "loss": 0.7239, "step": 3901 }, { "epoch": 0.25, "grad_norm": 1.3537286005341054, "learning_rate": 8.786266380789377e-06, "loss": 0.7847, "step": 3902 }, { "epoch": 0.25, "grad_norm": 1.5076048405663942, "learning_rate": 8.785589302868729e-06, "loss": 0.7576, "step": 3903 }, { "epoch": 0.25, "grad_norm": 1.5096094273955716, "learning_rate": 8.784912062251405e-06, "loss": 0.7748, "step": 3904 }, { "epoch": 0.25, "grad_norm": 1.3298313277901757, "learning_rate": 8.784234658966514e-06, "loss": 0.7031, "step": 3905 }, { "epoch": 0.25, "grad_norm": 1.725168597228278, "learning_rate": 8.783557093043172e-06, "loss": 0.7358, "step": 3906 }, { "epoch": 0.25, "grad_norm": 1.200761652218746, "learning_rate": 8.782879364510494e-06, "loss": 0.6998, "step": 3907 }, { "epoch": 0.25, "grad_norm": 1.5088968999216508, "learning_rate": 8.78220147339761e-06, "loss": 0.7414, "step": 3908 }, { "epoch": 0.25, "grad_norm": 1.4333362285748004, "learning_rate": 8.781523419733655e-06, "loss": 0.6378, "step": 3909 }, { "epoch": 0.25, "grad_norm": 1.7421487419879997, "learning_rate": 8.780845203547769e-06, "loss": 0.6952, "step": 3910 }, { "epoch": 0.25, "grad_norm": 1.9738312402358646, "learning_rate": 8.7801668248691e-06, "loss": 0.7857, "step": 3911 }, { "epoch": 0.25, "grad_norm": 1.5206353487514475, "learning_rate": 8.779488283726806e-06, "loss": 0.7065, "step": 3912 }, { "epoch": 0.25, "grad_norm": 1.5274053764612059, "learning_rate": 8.778809580150044e-06, "loss": 0.6726, "step": 3913 }, { "epoch": 0.25, "grad_norm": 1.1130454236593004, "learning_rate": 8.778130714167991e-06, "loss": 0.6621, "step": 3914 }, { "epoch": 0.25, "grad_norm": 1.4341181746731164, "learning_rate": 8.777451685809817e-06, "loss": 0.6841, "step": 3915 }, { "epoch": 0.25, "grad_norm": 1.4609613514414308, "learning_rate": 8.776772495104705e-06, "loss": 0.7234, "step": 3916 }, { "epoch": 0.25, "grad_norm": 1.8284246432888163, "learning_rate": 8.77609314208185e-06, "loss": 0.6692, "step": 3917 }, { "epoch": 0.25, "grad_norm": 1.662086770555622, "learning_rate": 8.775413626770447e-06, "loss": 0.7554, "step": 3918 }, { "epoch": 0.25, "grad_norm": 1.6508748958580006, "learning_rate": 8.774733949199696e-06, "loss": 0.7316, "step": 3919 }, { "epoch": 0.25, "grad_norm": 1.6192714624734004, "learning_rate": 8.774054109398815e-06, "loss": 0.6987, "step": 3920 }, { "epoch": 0.25, "grad_norm": 1.8053142327051164, "learning_rate": 8.773374107397017e-06, "loss": 0.7983, "step": 3921 }, { "epoch": 0.25, "grad_norm": 1.6450653079525916, "learning_rate": 8.772693943223529e-06, "loss": 0.7052, "step": 3922 }, { "epoch": 0.25, "grad_norm": 1.6021748426028817, "learning_rate": 8.772013616907584e-06, "loss": 0.6627, "step": 3923 }, { "epoch": 0.25, "grad_norm": 1.7379322708996539, "learning_rate": 8.771333128478419e-06, "loss": 0.7262, "step": 3924 }, { "epoch": 0.25, "grad_norm": 0.9469993916129242, "learning_rate": 8.77065247796528e-06, "loss": 0.5264, "step": 3925 }, { "epoch": 0.25, "grad_norm": 1.5726032433780774, "learning_rate": 8.769971665397423e-06, "loss": 0.7993, "step": 3926 }, { "epoch": 0.25, "grad_norm": 1.3270301146305798, "learning_rate": 8.769290690804104e-06, "loss": 0.6979, "step": 3927 }, { "epoch": 0.25, "grad_norm": 1.7449654883469754, "learning_rate": 8.768609554214591e-06, "loss": 0.778, "step": 3928 }, { "epoch": 0.25, "grad_norm": 1.499012208993487, "learning_rate": 8.767928255658158e-06, "loss": 0.6671, "step": 3929 }, { "epoch": 0.25, "grad_norm": 1.0772475637901155, "learning_rate": 8.767246795164089e-06, "loss": 0.6335, "step": 3930 }, { "epoch": 0.25, "grad_norm": 1.583814238243376, "learning_rate": 8.766565172761666e-06, "loss": 0.6896, "step": 3931 }, { "epoch": 0.25, "grad_norm": 1.6273145079856963, "learning_rate": 8.765883388480188e-06, "loss": 0.6928, "step": 3932 }, { "epoch": 0.25, "grad_norm": 1.5981124520899248, "learning_rate": 8.765201442348953e-06, "loss": 0.662, "step": 3933 }, { "epoch": 0.25, "grad_norm": 1.6520447036850607, "learning_rate": 8.764519334397275e-06, "loss": 0.7027, "step": 3934 }, { "epoch": 0.25, "grad_norm": 0.9938317670802523, "learning_rate": 8.763837064654464e-06, "loss": 0.5918, "step": 3935 }, { "epoch": 0.25, "grad_norm": 1.5165799766558075, "learning_rate": 8.763154633149846e-06, "loss": 0.6936, "step": 3936 }, { "epoch": 0.25, "grad_norm": 1.6484890628306903, "learning_rate": 8.762472039912748e-06, "loss": 0.778, "step": 3937 }, { "epoch": 0.25, "grad_norm": 1.6534942842434388, "learning_rate": 8.761789284972508e-06, "loss": 0.7436, "step": 3938 }, { "epoch": 0.25, "grad_norm": 1.5433197291480252, "learning_rate": 8.76110636835847e-06, "loss": 0.7055, "step": 3939 }, { "epoch": 0.25, "grad_norm": 1.453063862587274, "learning_rate": 8.760423290099983e-06, "loss": 0.68, "step": 3940 }, { "epoch": 0.25, "grad_norm": 1.8120269883597917, "learning_rate": 8.759740050226406e-06, "loss": 0.7305, "step": 3941 }, { "epoch": 0.25, "grad_norm": 1.4665822742711914, "learning_rate": 8.7590566487671e-06, "loss": 0.7924, "step": 3942 }, { "epoch": 0.25, "grad_norm": 1.4121997498434287, "learning_rate": 8.758373085751439e-06, "loss": 0.7082, "step": 3943 }, { "epoch": 0.25, "grad_norm": 1.5215369321072663, "learning_rate": 8.7576893612088e-06, "loss": 0.6797, "step": 3944 }, { "epoch": 0.25, "grad_norm": 1.1509664721056014, "learning_rate": 8.75700547516857e-06, "loss": 0.6417, "step": 3945 }, { "epoch": 0.25, "grad_norm": 1.5643804819985283, "learning_rate": 8.756321427660137e-06, "loss": 0.7737, "step": 3946 }, { "epoch": 0.25, "grad_norm": 1.139490674078652, "learning_rate": 8.755637218712902e-06, "loss": 0.7409, "step": 3947 }, { "epoch": 0.25, "grad_norm": 1.7085795563793014, "learning_rate": 8.754952848356272e-06, "loss": 0.7055, "step": 3948 }, { "epoch": 0.25, "grad_norm": 1.397335034545907, "learning_rate": 8.75426831661966e-06, "loss": 0.7998, "step": 3949 }, { "epoch": 0.25, "grad_norm": 1.6203535764323533, "learning_rate": 8.753583623532483e-06, "loss": 0.656, "step": 3950 }, { "epoch": 0.25, "grad_norm": 1.5042097831720103, "learning_rate": 8.75289876912417e-06, "loss": 0.6906, "step": 3951 }, { "epoch": 0.25, "grad_norm": 1.594317856724062, "learning_rate": 8.752213753424153e-06, "loss": 0.7954, "step": 3952 }, { "epoch": 0.25, "grad_norm": 2.101300053790781, "learning_rate": 8.751528576461873e-06, "loss": 0.6806, "step": 3953 }, { "epoch": 0.25, "grad_norm": 1.9613802788977803, "learning_rate": 8.75084323826678e-06, "loss": 0.7245, "step": 3954 }, { "epoch": 0.25, "grad_norm": 1.715364525640845, "learning_rate": 8.750157738868323e-06, "loss": 0.7466, "step": 3955 }, { "epoch": 0.25, "grad_norm": 1.4408436734073615, "learning_rate": 8.749472078295968e-06, "loss": 0.8068, "step": 3956 }, { "epoch": 0.25, "grad_norm": 1.6051619170637383, "learning_rate": 8.748786256579182e-06, "loss": 0.7354, "step": 3957 }, { "epoch": 0.25, "grad_norm": 1.398134973613185, "learning_rate": 8.748100273747442e-06, "loss": 0.683, "step": 3958 }, { "epoch": 0.25, "grad_norm": 1.5293604908487677, "learning_rate": 8.747414129830225e-06, "loss": 0.6983, "step": 3959 }, { "epoch": 0.25, "grad_norm": 1.6362563540675559, "learning_rate": 8.746727824857024e-06, "loss": 0.6885, "step": 3960 }, { "epoch": 0.25, "grad_norm": 1.7146606799090318, "learning_rate": 8.746041358857334e-06, "loss": 0.6947, "step": 3961 }, { "epoch": 0.25, "grad_norm": 1.6404531873627641, "learning_rate": 8.74535473186066e-06, "loss": 0.699, "step": 3962 }, { "epoch": 0.25, "grad_norm": 1.5315842893581386, "learning_rate": 8.744667943896507e-06, "loss": 0.7099, "step": 3963 }, { "epoch": 0.25, "grad_norm": 1.8516471470893137, "learning_rate": 8.743980994994394e-06, "loss": 0.7569, "step": 3964 }, { "epoch": 0.25, "grad_norm": 1.0603334351725267, "learning_rate": 8.743293885183847e-06, "loss": 0.7227, "step": 3965 }, { "epoch": 0.25, "grad_norm": 1.5562800145612175, "learning_rate": 8.742606614494395e-06, "loss": 0.7126, "step": 3966 }, { "epoch": 0.25, "grad_norm": 1.5259260748941237, "learning_rate": 8.741919182955573e-06, "loss": 0.7417, "step": 3967 }, { "epoch": 0.25, "grad_norm": 1.674857253588037, "learning_rate": 8.741231590596928e-06, "loss": 0.8068, "step": 3968 }, { "epoch": 0.25, "grad_norm": 1.544333495897531, "learning_rate": 8.740543837448012e-06, "loss": 0.8072, "step": 3969 }, { "epoch": 0.25, "grad_norm": 1.5507979808178338, "learning_rate": 8.73985592353838e-06, "loss": 0.7309, "step": 3970 }, { "epoch": 0.25, "grad_norm": 1.4813819711889722, "learning_rate": 8.739167848897601e-06, "loss": 0.7057, "step": 3971 }, { "epoch": 0.25, "grad_norm": 1.5781130538911927, "learning_rate": 8.738479613555243e-06, "loss": 0.6609, "step": 3972 }, { "epoch": 0.25, "grad_norm": 2.0061452493103804, "learning_rate": 8.737791217540887e-06, "loss": 0.7168, "step": 3973 }, { "epoch": 0.25, "grad_norm": 1.560297176594103, "learning_rate": 8.73710266088412e-06, "loss": 0.6735, "step": 3974 }, { "epoch": 0.25, "grad_norm": 2.928884913384052, "learning_rate": 8.736413943614533e-06, "loss": 0.7531, "step": 3975 }, { "epoch": 0.25, "grad_norm": 1.4034692856305393, "learning_rate": 8.735725065761724e-06, "loss": 0.6738, "step": 3976 }, { "epoch": 0.25, "grad_norm": 1.4400994571510781, "learning_rate": 8.735036027355304e-06, "loss": 0.7808, "step": 3977 }, { "epoch": 0.25, "grad_norm": 1.5320418131734996, "learning_rate": 8.734346828424881e-06, "loss": 0.6108, "step": 3978 }, { "epoch": 0.25, "grad_norm": 2.0783180356870785, "learning_rate": 8.733657469000081e-06, "loss": 0.7277, "step": 3979 }, { "epoch": 0.25, "grad_norm": 1.556086604713057, "learning_rate": 8.732967949110528e-06, "loss": 0.7153, "step": 3980 }, { "epoch": 0.25, "grad_norm": 1.7113964962818184, "learning_rate": 8.732278268785856e-06, "loss": 0.7407, "step": 3981 }, { "epoch": 0.25, "grad_norm": 1.6285764287729996, "learning_rate": 8.731588428055708e-06, "loss": 0.7578, "step": 3982 }, { "epoch": 0.25, "grad_norm": 1.623755692717083, "learning_rate": 8.730898426949728e-06, "loss": 0.6534, "step": 3983 }, { "epoch": 0.26, "grad_norm": 1.6555785401293437, "learning_rate": 8.730208265497575e-06, "loss": 0.7894, "step": 3984 }, { "epoch": 0.26, "grad_norm": 1.5868162448402525, "learning_rate": 8.729517943728909e-06, "loss": 0.7832, "step": 3985 }, { "epoch": 0.26, "grad_norm": 1.56635842629537, "learning_rate": 8.728827461673398e-06, "loss": 0.6946, "step": 3986 }, { "epoch": 0.26, "grad_norm": 1.426184200017523, "learning_rate": 8.728136819360717e-06, "loss": 0.7741, "step": 3987 }, { "epoch": 0.26, "grad_norm": 1.0540127673153976, "learning_rate": 8.727446016820553e-06, "loss": 0.7381, "step": 3988 }, { "epoch": 0.26, "grad_norm": 1.3433231317844125, "learning_rate": 8.726755054082589e-06, "loss": 0.7164, "step": 3989 }, { "epoch": 0.26, "grad_norm": 1.3979499770002228, "learning_rate": 8.726063931176522e-06, "loss": 0.6759, "step": 3990 }, { "epoch": 0.26, "grad_norm": 1.5164466932011524, "learning_rate": 8.72537264813206e-06, "loss": 0.7055, "step": 3991 }, { "epoch": 0.26, "grad_norm": 1.5845674977080815, "learning_rate": 8.724681204978908e-06, "loss": 0.6523, "step": 3992 }, { "epoch": 0.26, "grad_norm": 1.3777415838033582, "learning_rate": 8.723989601746785e-06, "loss": 0.6653, "step": 3993 }, { "epoch": 0.26, "grad_norm": 1.4717747659831826, "learning_rate": 8.723297838465414e-06, "loss": 0.6173, "step": 3994 }, { "epoch": 0.26, "grad_norm": 1.6701962762539095, "learning_rate": 8.722605915164526e-06, "loss": 0.6563, "step": 3995 }, { "epoch": 0.26, "grad_norm": 1.053613299162429, "learning_rate": 8.721913831873859e-06, "loss": 0.6725, "step": 3996 }, { "epoch": 0.26, "grad_norm": 1.8342802033767187, "learning_rate": 8.721221588623154e-06, "loss": 0.7635, "step": 3997 }, { "epoch": 0.26, "grad_norm": 1.526446338755528, "learning_rate": 8.720529185442167e-06, "loss": 0.6682, "step": 3998 }, { "epoch": 0.26, "grad_norm": 1.6513952185029874, "learning_rate": 8.71983662236065e-06, "loss": 0.7145, "step": 3999 }, { "epoch": 0.26, "grad_norm": 1.520215403489592, "learning_rate": 8.719143899408376e-06, "loss": 0.6763, "step": 4000 }, { "epoch": 0.26, "grad_norm": 1.500537894666634, "learning_rate": 8.718451016615108e-06, "loss": 0.8485, "step": 4001 }, { "epoch": 0.26, "grad_norm": 0.9521623170533023, "learning_rate": 8.71775797401063e-06, "loss": 0.6082, "step": 4002 }, { "epoch": 0.26, "grad_norm": 1.6847020930664276, "learning_rate": 8.717064771624728e-06, "loss": 0.7057, "step": 4003 }, { "epoch": 0.26, "grad_norm": 1.514697001785672, "learning_rate": 8.716371409487191e-06, "loss": 0.7098, "step": 4004 }, { "epoch": 0.26, "grad_norm": 1.4228848376811782, "learning_rate": 8.715677887627822e-06, "loss": 0.8034, "step": 4005 }, { "epoch": 0.26, "grad_norm": 1.4385346365016303, "learning_rate": 8.714984206076423e-06, "loss": 0.7431, "step": 4006 }, { "epoch": 0.26, "grad_norm": 1.3990936303950068, "learning_rate": 8.71429036486281e-06, "loss": 0.6565, "step": 4007 }, { "epoch": 0.26, "grad_norm": 1.4752593737521122, "learning_rate": 8.713596364016802e-06, "loss": 0.6971, "step": 4008 }, { "epoch": 0.26, "grad_norm": 1.2315646346123776, "learning_rate": 8.712902203568226e-06, "loss": 0.6472, "step": 4009 }, { "epoch": 0.26, "grad_norm": 1.5713860712430752, "learning_rate": 8.712207883546913e-06, "loss": 0.7811, "step": 4010 }, { "epoch": 0.26, "grad_norm": 1.4516676422755697, "learning_rate": 8.711513403982708e-06, "loss": 0.7199, "step": 4011 }, { "epoch": 0.26, "grad_norm": 1.6728227633408628, "learning_rate": 8.710818764905455e-06, "loss": 0.7201, "step": 4012 }, { "epoch": 0.26, "grad_norm": 1.716267498506481, "learning_rate": 8.710123966345008e-06, "loss": 0.7268, "step": 4013 }, { "epoch": 0.26, "grad_norm": 1.4068470931676071, "learning_rate": 8.70942900833123e-06, "loss": 0.7552, "step": 4014 }, { "epoch": 0.26, "grad_norm": 1.5955103072101944, "learning_rate": 8.708733890893987e-06, "loss": 0.7149, "step": 4015 }, { "epoch": 0.26, "grad_norm": 1.7068943477954712, "learning_rate": 8.708038614063156e-06, "loss": 0.7712, "step": 4016 }, { "epoch": 0.26, "grad_norm": 1.3634747743499143, "learning_rate": 8.707343177868616e-06, "loss": 0.6783, "step": 4017 }, { "epoch": 0.26, "grad_norm": 1.8572695475026568, "learning_rate": 8.706647582340258e-06, "loss": 0.7863, "step": 4018 }, { "epoch": 0.26, "grad_norm": 1.6455687327769666, "learning_rate": 8.705951827507974e-06, "loss": 0.7366, "step": 4019 }, { "epoch": 0.26, "grad_norm": 1.487849724650885, "learning_rate": 8.705255913401668e-06, "loss": 0.7789, "step": 4020 }, { "epoch": 0.26, "grad_norm": 1.3834170249136901, "learning_rate": 8.704559840051249e-06, "loss": 0.6909, "step": 4021 }, { "epoch": 0.26, "grad_norm": 1.76116372980691, "learning_rate": 8.703863607486631e-06, "loss": 0.7363, "step": 4022 }, { "epoch": 0.26, "grad_norm": 1.3683793510722062, "learning_rate": 8.70316721573774e-06, "loss": 0.6551, "step": 4023 }, { "epoch": 0.26, "grad_norm": 1.5182302357043354, "learning_rate": 8.702470664834503e-06, "loss": 0.6997, "step": 4024 }, { "epoch": 0.26, "grad_norm": 1.6242988936371483, "learning_rate": 8.701773954806856e-06, "loss": 0.7702, "step": 4025 }, { "epoch": 0.26, "grad_norm": 1.4292228888172687, "learning_rate": 8.701077085684744e-06, "loss": 0.6705, "step": 4026 }, { "epoch": 0.26, "grad_norm": 1.7548749870626101, "learning_rate": 8.700380057498115e-06, "loss": 0.733, "step": 4027 }, { "epoch": 0.26, "grad_norm": 1.498196210156328, "learning_rate": 8.699682870276927e-06, "loss": 0.7373, "step": 4028 }, { "epoch": 0.26, "grad_norm": 1.843242360566345, "learning_rate": 8.698985524051143e-06, "loss": 0.6861, "step": 4029 }, { "epoch": 0.26, "grad_norm": 1.189537122572757, "learning_rate": 8.698288018850735e-06, "loss": 0.5536, "step": 4030 }, { "epoch": 0.26, "grad_norm": 1.5356326281525265, "learning_rate": 8.697590354705679e-06, "loss": 0.7299, "step": 4031 }, { "epoch": 0.26, "grad_norm": 1.6081940709001514, "learning_rate": 8.696892531645958e-06, "loss": 0.7532, "step": 4032 }, { "epoch": 0.26, "grad_norm": 1.5369528005101585, "learning_rate": 8.696194549701564e-06, "loss": 0.6886, "step": 4033 }, { "epoch": 0.26, "grad_norm": 1.6311650972480738, "learning_rate": 8.695496408902496e-06, "loss": 0.7748, "step": 4034 }, { "epoch": 0.26, "grad_norm": 1.484137103849675, "learning_rate": 8.694798109278758e-06, "loss": 0.6891, "step": 4035 }, { "epoch": 0.26, "grad_norm": 1.6813807845306104, "learning_rate": 8.69409965086036e-06, "loss": 0.7892, "step": 4036 }, { "epoch": 0.26, "grad_norm": 2.1479017878608673, "learning_rate": 8.693401033677322e-06, "loss": 0.6439, "step": 4037 }, { "epoch": 0.26, "grad_norm": 1.610388803394183, "learning_rate": 8.692702257759669e-06, "loss": 0.577, "step": 4038 }, { "epoch": 0.26, "grad_norm": 1.4858137931631934, "learning_rate": 8.69200332313743e-06, "loss": 0.7619, "step": 4039 }, { "epoch": 0.26, "grad_norm": 1.5522611610141144, "learning_rate": 8.691304229840649e-06, "loss": 0.8429, "step": 4040 }, { "epoch": 0.26, "grad_norm": 1.5386308317284936, "learning_rate": 8.690604977899369e-06, "loss": 0.6553, "step": 4041 }, { "epoch": 0.26, "grad_norm": 1.4720534925051862, "learning_rate": 8.689905567343639e-06, "loss": 0.6735, "step": 4042 }, { "epoch": 0.26, "grad_norm": 1.7066528299093273, "learning_rate": 8.689205998203522e-06, "loss": 0.7446, "step": 4043 }, { "epoch": 0.26, "grad_norm": 1.4777210072067561, "learning_rate": 8.688506270509085e-06, "loss": 0.7525, "step": 4044 }, { "epoch": 0.26, "grad_norm": 1.7916528601684656, "learning_rate": 8.6878063842904e-06, "loss": 0.7186, "step": 4045 }, { "epoch": 0.26, "grad_norm": 1.3440824822676583, "learning_rate": 8.687106339577543e-06, "loss": 0.6576, "step": 4046 }, { "epoch": 0.26, "grad_norm": 1.606099305815083, "learning_rate": 8.686406136400604e-06, "loss": 0.8218, "step": 4047 }, { "epoch": 0.26, "grad_norm": 1.5354605226837306, "learning_rate": 8.685705774789677e-06, "loss": 0.7007, "step": 4048 }, { "epoch": 0.26, "grad_norm": 1.7979271844881035, "learning_rate": 8.685005254774859e-06, "loss": 0.7314, "step": 4049 }, { "epoch": 0.26, "grad_norm": 1.519415796056037, "learning_rate": 8.68430457638626e-06, "loss": 0.6458, "step": 4050 }, { "epoch": 0.26, "grad_norm": 1.7892491145937215, "learning_rate": 8.68360373965399e-06, "loss": 0.6799, "step": 4051 }, { "epoch": 0.26, "grad_norm": 1.5241488646293537, "learning_rate": 8.682902744608173e-06, "loss": 0.6585, "step": 4052 }, { "epoch": 0.26, "grad_norm": 1.644867397176286, "learning_rate": 8.682201591278934e-06, "loss": 0.6939, "step": 4053 }, { "epoch": 0.26, "grad_norm": 1.7581813471299346, "learning_rate": 8.681500279696408e-06, "loss": 0.6852, "step": 4054 }, { "epoch": 0.26, "grad_norm": 1.4911472551608822, "learning_rate": 8.680798809890737e-06, "loss": 0.7518, "step": 4055 }, { "epoch": 0.26, "grad_norm": 1.512568783098508, "learning_rate": 8.680097181892067e-06, "loss": 0.8315, "step": 4056 }, { "epoch": 0.26, "grad_norm": 0.961601755235757, "learning_rate": 8.679395395730552e-06, "loss": 0.6848, "step": 4057 }, { "epoch": 0.26, "grad_norm": 1.5588542970567265, "learning_rate": 8.678693451436355e-06, "loss": 0.7282, "step": 4058 }, { "epoch": 0.26, "grad_norm": 1.609129082837299, "learning_rate": 8.677991349039644e-06, "loss": 0.6749, "step": 4059 }, { "epoch": 0.26, "grad_norm": 1.7397371086566944, "learning_rate": 8.677289088570594e-06, "loss": 0.7594, "step": 4060 }, { "epoch": 0.26, "grad_norm": 1.5084815881088212, "learning_rate": 8.676586670059383e-06, "loss": 0.7007, "step": 4061 }, { "epoch": 0.26, "grad_norm": 1.746883023035026, "learning_rate": 8.675884093536206e-06, "loss": 0.7391, "step": 4062 }, { "epoch": 0.26, "grad_norm": 1.5413085089221945, "learning_rate": 8.675181359031253e-06, "loss": 0.7026, "step": 4063 }, { "epoch": 0.26, "grad_norm": 1.4768451995237677, "learning_rate": 8.674478466574727e-06, "loss": 0.7367, "step": 4064 }, { "epoch": 0.26, "grad_norm": 1.5787717639819023, "learning_rate": 8.673775416196838e-06, "loss": 0.6348, "step": 4065 }, { "epoch": 0.26, "grad_norm": 1.0748578046377117, "learning_rate": 8.673072207927805e-06, "loss": 0.6483, "step": 4066 }, { "epoch": 0.26, "grad_norm": 2.0759542187160136, "learning_rate": 8.672368841797842e-06, "loss": 0.8006, "step": 4067 }, { "epoch": 0.26, "grad_norm": 1.4729109544161814, "learning_rate": 8.671665317837185e-06, "loss": 0.6938, "step": 4068 }, { "epoch": 0.26, "grad_norm": 1.7370997745613554, "learning_rate": 8.670961636076067e-06, "loss": 0.7984, "step": 4069 }, { "epoch": 0.26, "grad_norm": 1.4385547869313131, "learning_rate": 8.670257796544732e-06, "loss": 0.6911, "step": 4070 }, { "epoch": 0.26, "grad_norm": 1.5378588077628137, "learning_rate": 8.669553799273429e-06, "loss": 0.631, "step": 4071 }, { "epoch": 0.26, "grad_norm": 1.202518133057413, "learning_rate": 8.668849644292416e-06, "loss": 0.7295, "step": 4072 }, { "epoch": 0.26, "grad_norm": 2.0011702414704815, "learning_rate": 8.668145331631953e-06, "loss": 0.7081, "step": 4073 }, { "epoch": 0.26, "grad_norm": 1.5817163563270182, "learning_rate": 8.667440861322312e-06, "loss": 0.7276, "step": 4074 }, { "epoch": 0.26, "grad_norm": 1.7855055948700913, "learning_rate": 8.666736233393769e-06, "loss": 0.5883, "step": 4075 }, { "epoch": 0.26, "grad_norm": 1.4084506000279744, "learning_rate": 8.666031447876607e-06, "loss": 0.6631, "step": 4076 }, { "epoch": 0.26, "grad_norm": 1.6899969327584379, "learning_rate": 8.665326504801117e-06, "loss": 0.7487, "step": 4077 }, { "epoch": 0.26, "grad_norm": 1.6853113940559012, "learning_rate": 8.664621404197598e-06, "loss": 0.7222, "step": 4078 }, { "epoch": 0.26, "grad_norm": 1.443834883445548, "learning_rate": 8.663916146096348e-06, "loss": 0.6471, "step": 4079 }, { "epoch": 0.26, "grad_norm": 1.6146615885127265, "learning_rate": 8.663210730527683e-06, "loss": 0.7351, "step": 4080 }, { "epoch": 0.26, "grad_norm": 2.2480360664210552, "learning_rate": 8.662505157521918e-06, "loss": 0.7678, "step": 4081 }, { "epoch": 0.26, "grad_norm": 1.8321993156329561, "learning_rate": 8.661799427109377e-06, "loss": 0.7342, "step": 4082 }, { "epoch": 0.26, "grad_norm": 1.6535843100331455, "learning_rate": 8.66109353932039e-06, "loss": 0.717, "step": 4083 }, { "epoch": 0.26, "grad_norm": 1.4989609900882193, "learning_rate": 8.660387494185298e-06, "loss": 0.7082, "step": 4084 }, { "epoch": 0.26, "grad_norm": 1.7143560307073082, "learning_rate": 8.659681291734441e-06, "loss": 0.8103, "step": 4085 }, { "epoch": 0.26, "grad_norm": 1.1775885732974647, "learning_rate": 8.658974931998174e-06, "loss": 0.6302, "step": 4086 }, { "epoch": 0.26, "grad_norm": 1.2480973891207785, "learning_rate": 8.658268415006853e-06, "loss": 0.635, "step": 4087 }, { "epoch": 0.26, "grad_norm": 1.4786692667310288, "learning_rate": 8.657561740790841e-06, "loss": 0.7309, "step": 4088 }, { "epoch": 0.26, "grad_norm": 1.7265615006192816, "learning_rate": 8.656854909380512e-06, "loss": 0.7496, "step": 4089 }, { "epoch": 0.26, "grad_norm": 1.3380136743023274, "learning_rate": 8.656147920806241e-06, "loss": 0.6431, "step": 4090 }, { "epoch": 0.26, "grad_norm": 1.687079493380988, "learning_rate": 8.655440775098418e-06, "loss": 0.7425, "step": 4091 }, { "epoch": 0.26, "grad_norm": 1.631889846310437, "learning_rate": 8.65473347228743e-06, "loss": 0.6678, "step": 4092 }, { "epoch": 0.26, "grad_norm": 1.655462240402851, "learning_rate": 8.654026012403678e-06, "loss": 0.7089, "step": 4093 }, { "epoch": 0.26, "grad_norm": 1.417329813312474, "learning_rate": 8.653318395477565e-06, "loss": 0.721, "step": 4094 }, { "epoch": 0.26, "grad_norm": 1.4909269582260907, "learning_rate": 8.652610621539505e-06, "loss": 0.8031, "step": 4095 }, { "epoch": 0.26, "grad_norm": 1.554683506081681, "learning_rate": 8.651902690619916e-06, "loss": 0.7739, "step": 4096 }, { "epoch": 0.26, "grad_norm": 1.9634823537610702, "learning_rate": 8.651194602749223e-06, "loss": 0.7429, "step": 4097 }, { "epoch": 0.26, "grad_norm": 1.486327886271871, "learning_rate": 8.650486357957856e-06, "loss": 0.6886, "step": 4098 }, { "epoch": 0.26, "grad_norm": 1.4392322041903418, "learning_rate": 8.649777956276257e-06, "loss": 0.7926, "step": 4099 }, { "epoch": 0.26, "grad_norm": 1.6994704998839467, "learning_rate": 8.649069397734873e-06, "loss": 0.7492, "step": 4100 }, { "epoch": 0.26, "grad_norm": 3.3954445374427293, "learning_rate": 8.648360682364153e-06, "loss": 0.6785, "step": 4101 }, { "epoch": 0.26, "grad_norm": 1.47087325403067, "learning_rate": 8.647651810194556e-06, "loss": 0.8116, "step": 4102 }, { "epoch": 0.26, "grad_norm": 1.7017421520936833, "learning_rate": 8.646942781256548e-06, "loss": 0.8188, "step": 4103 }, { "epoch": 0.26, "grad_norm": 1.6125963195035897, "learning_rate": 8.646233595580604e-06, "loss": 0.7159, "step": 4104 }, { "epoch": 0.26, "grad_norm": 1.5988003352224756, "learning_rate": 8.645524253197202e-06, "loss": 0.7383, "step": 4105 }, { "epoch": 0.26, "grad_norm": 1.8497223257800295, "learning_rate": 8.644814754136827e-06, "loss": 0.7833, "step": 4106 }, { "epoch": 0.26, "grad_norm": 2.603332048366836, "learning_rate": 8.644105098429975e-06, "loss": 0.8719, "step": 4107 }, { "epoch": 0.26, "grad_norm": 1.4257178365381984, "learning_rate": 8.64339528610714e-06, "loss": 0.7707, "step": 4108 }, { "epoch": 0.26, "grad_norm": 1.4011123738852513, "learning_rate": 8.642685317198833e-06, "loss": 0.7492, "step": 4109 }, { "epoch": 0.26, "grad_norm": 1.2359206122260344, "learning_rate": 8.641975191735567e-06, "loss": 0.7237, "step": 4110 }, { "epoch": 0.26, "grad_norm": 1.756251882455834, "learning_rate": 8.64126490974786e-06, "loss": 0.7993, "step": 4111 }, { "epoch": 0.26, "grad_norm": 1.5852254997075343, "learning_rate": 8.640554471266236e-06, "loss": 0.7174, "step": 4112 }, { "epoch": 0.26, "grad_norm": 0.9302934110761897, "learning_rate": 8.639843876321232e-06, "loss": 0.5377, "step": 4113 }, { "epoch": 0.26, "grad_norm": 1.8320486566341776, "learning_rate": 8.639133124943386e-06, "loss": 0.7608, "step": 4114 }, { "epoch": 0.26, "grad_norm": 1.4570275884090311, "learning_rate": 8.638422217163244e-06, "loss": 0.6711, "step": 4115 }, { "epoch": 0.26, "grad_norm": 1.1622378752560605, "learning_rate": 8.637711153011363e-06, "loss": 0.6589, "step": 4116 }, { "epoch": 0.26, "grad_norm": 1.1208056886864382, "learning_rate": 8.6369999325183e-06, "loss": 0.6272, "step": 4117 }, { "epoch": 0.26, "grad_norm": 1.620455305496047, "learning_rate": 8.63628855571462e-06, "loss": 0.7336, "step": 4118 }, { "epoch": 0.26, "grad_norm": 1.2557286854108503, "learning_rate": 8.6355770226309e-06, "loss": 0.7295, "step": 4119 }, { "epoch": 0.26, "grad_norm": 1.6387280245771303, "learning_rate": 8.634865333297722e-06, "loss": 0.7033, "step": 4120 }, { "epoch": 0.26, "grad_norm": 1.7267751727646106, "learning_rate": 8.634153487745667e-06, "loss": 0.7237, "step": 4121 }, { "epoch": 0.26, "grad_norm": 1.7492995339602295, "learning_rate": 8.633441486005331e-06, "loss": 0.6905, "step": 4122 }, { "epoch": 0.26, "grad_norm": 1.5493910234666757, "learning_rate": 8.632729328107317e-06, "loss": 0.7046, "step": 4123 }, { "epoch": 0.26, "grad_norm": 1.5331452132100332, "learning_rate": 8.632017014082227e-06, "loss": 0.7105, "step": 4124 }, { "epoch": 0.26, "grad_norm": 1.3874284970693231, "learning_rate": 8.63130454396068e-06, "loss": 0.6733, "step": 4125 }, { "epoch": 0.26, "grad_norm": 1.5593264508515987, "learning_rate": 8.630591917773294e-06, "loss": 0.6696, "step": 4126 }, { "epoch": 0.26, "grad_norm": 1.7595855202429658, "learning_rate": 8.629879135550695e-06, "loss": 0.7639, "step": 4127 }, { "epoch": 0.26, "grad_norm": 1.7642629655814894, "learning_rate": 8.62916619732352e-06, "loss": 0.7128, "step": 4128 }, { "epoch": 0.26, "grad_norm": 1.2304619994700137, "learning_rate": 8.628453103122404e-06, "loss": 0.7145, "step": 4129 }, { "epoch": 0.26, "grad_norm": 1.918245245763864, "learning_rate": 8.627739852978003e-06, "loss": 0.6503, "step": 4130 }, { "epoch": 0.26, "grad_norm": 1.6026288753556455, "learning_rate": 8.627026446920963e-06, "loss": 0.7616, "step": 4131 }, { "epoch": 0.26, "grad_norm": 2.042365187896033, "learning_rate": 8.62631288498195e-06, "loss": 0.7026, "step": 4132 }, { "epoch": 0.26, "grad_norm": 1.409558695862731, "learning_rate": 8.625599167191627e-06, "loss": 0.7377, "step": 4133 }, { "epoch": 0.26, "grad_norm": 1.6641165019092308, "learning_rate": 8.624885293580671e-06, "loss": 0.7774, "step": 4134 }, { "epoch": 0.26, "grad_norm": 1.5847992911299802, "learning_rate": 8.624171264179761e-06, "loss": 0.6729, "step": 4135 }, { "epoch": 0.26, "grad_norm": 1.482784375850879, "learning_rate": 8.623457079019585e-06, "loss": 0.6572, "step": 4136 }, { "epoch": 0.26, "grad_norm": 1.055294761702808, "learning_rate": 8.62274273813084e-06, "loss": 0.5235, "step": 4137 }, { "epoch": 0.26, "grad_norm": 1.0318727625944213, "learning_rate": 8.622028241544225e-06, "loss": 0.6331, "step": 4138 }, { "epoch": 0.26, "grad_norm": 1.2447798379331096, "learning_rate": 8.621313589290446e-06, "loss": 0.7207, "step": 4139 }, { "epoch": 0.26, "grad_norm": 1.4941752722911463, "learning_rate": 8.620598781400216e-06, "loss": 0.7586, "step": 4140 }, { "epoch": 0.27, "grad_norm": 1.0494281164566845, "learning_rate": 8.619883817904262e-06, "loss": 0.7092, "step": 4141 }, { "epoch": 0.27, "grad_norm": 1.7982048808820423, "learning_rate": 8.619168698833306e-06, "loss": 0.7486, "step": 4142 }, { "epoch": 0.27, "grad_norm": 1.6081118762155493, "learning_rate": 8.618453424218085e-06, "loss": 0.7346, "step": 4143 }, { "epoch": 0.27, "grad_norm": 1.4368759829859024, "learning_rate": 8.61773799408934e-06, "loss": 0.7788, "step": 4144 }, { "epoch": 0.27, "grad_norm": 1.6459496054497007, "learning_rate": 8.617022408477816e-06, "loss": 0.8499, "step": 4145 }, { "epoch": 0.27, "grad_norm": 1.5574540704179354, "learning_rate": 8.61630666741427e-06, "loss": 0.6939, "step": 4146 }, { "epoch": 0.27, "grad_norm": 1.7205486597314696, "learning_rate": 8.615590770929461e-06, "loss": 0.7148, "step": 4147 }, { "epoch": 0.27, "grad_norm": 1.577011309764895, "learning_rate": 8.61487471905416e-06, "loss": 0.7052, "step": 4148 }, { "epoch": 0.27, "grad_norm": 1.9451211705807536, "learning_rate": 8.614158511819138e-06, "loss": 0.7077, "step": 4149 }, { "epoch": 0.27, "grad_norm": 1.6090406655620042, "learning_rate": 8.613442149255179e-06, "loss": 0.7889, "step": 4150 }, { "epoch": 0.27, "grad_norm": 1.314280391251136, "learning_rate": 8.612725631393068e-06, "loss": 0.649, "step": 4151 }, { "epoch": 0.27, "grad_norm": 1.100036371697592, "learning_rate": 8.612008958263603e-06, "loss": 0.684, "step": 4152 }, { "epoch": 0.27, "grad_norm": 1.6974660980075522, "learning_rate": 8.611292129897581e-06, "loss": 0.696, "step": 4153 }, { "epoch": 0.27, "grad_norm": 1.7773677179710468, "learning_rate": 8.610575146325813e-06, "loss": 0.7085, "step": 4154 }, { "epoch": 0.27, "grad_norm": 1.1959400091556411, "learning_rate": 8.60985800757911e-06, "loss": 0.6873, "step": 4155 }, { "epoch": 0.27, "grad_norm": 1.6676017112270705, "learning_rate": 8.609140713688296e-06, "loss": 0.7593, "step": 4156 }, { "epoch": 0.27, "grad_norm": 1.5891654275777785, "learning_rate": 8.608423264684198e-06, "loss": 0.6804, "step": 4157 }, { "epoch": 0.27, "grad_norm": 2.993721308242713, "learning_rate": 8.607705660597652e-06, "loss": 0.665, "step": 4158 }, { "epoch": 0.27, "grad_norm": 1.624315924010597, "learning_rate": 8.606987901459497e-06, "loss": 0.6393, "step": 4159 }, { "epoch": 0.27, "grad_norm": 1.6475660847355365, "learning_rate": 8.60626998730058e-06, "loss": 0.6943, "step": 4160 }, { "epoch": 0.27, "grad_norm": 1.452958984974497, "learning_rate": 8.605551918151755e-06, "loss": 0.6557, "step": 4161 }, { "epoch": 0.27, "grad_norm": 2.047911440230365, "learning_rate": 8.604833694043889e-06, "loss": 0.8725, "step": 4162 }, { "epoch": 0.27, "grad_norm": 1.5108974519971143, "learning_rate": 8.604115315007844e-06, "loss": 0.6806, "step": 4163 }, { "epoch": 0.27, "grad_norm": 1.6138035872603314, "learning_rate": 8.603396781074495e-06, "loss": 0.7377, "step": 4164 }, { "epoch": 0.27, "grad_norm": 1.5821773562768435, "learning_rate": 8.602678092274725e-06, "loss": 0.6696, "step": 4165 }, { "epoch": 0.27, "grad_norm": 1.4581956171710664, "learning_rate": 8.601959248639421e-06, "loss": 0.6936, "step": 4166 }, { "epoch": 0.27, "grad_norm": 1.4892939083637833, "learning_rate": 8.601240250199476e-06, "loss": 0.6903, "step": 4167 }, { "epoch": 0.27, "grad_norm": 1.2060636787971315, "learning_rate": 8.600521096985795e-06, "loss": 0.6596, "step": 4168 }, { "epoch": 0.27, "grad_norm": 1.5895310080136376, "learning_rate": 8.599801789029281e-06, "loss": 0.7326, "step": 4169 }, { "epoch": 0.27, "grad_norm": 1.4872624367258445, "learning_rate": 8.59908232636085e-06, "loss": 0.6564, "step": 4170 }, { "epoch": 0.27, "grad_norm": 1.5932928516144833, "learning_rate": 8.598362709011425e-06, "loss": 0.7546, "step": 4171 }, { "epoch": 0.27, "grad_norm": 1.478300328558444, "learning_rate": 8.59764293701193e-06, "loss": 0.6274, "step": 4172 }, { "epoch": 0.27, "grad_norm": 1.4114575078250813, "learning_rate": 8.596923010393303e-06, "loss": 0.7675, "step": 4173 }, { "epoch": 0.27, "grad_norm": 1.596891423351978, "learning_rate": 8.596202929186483e-06, "loss": 0.7241, "step": 4174 }, { "epoch": 0.27, "grad_norm": 1.892649643905448, "learning_rate": 8.595482693422416e-06, "loss": 0.6303, "step": 4175 }, { "epoch": 0.27, "grad_norm": 1.4935567566712753, "learning_rate": 8.59476230313206e-06, "loss": 0.7505, "step": 4176 }, { "epoch": 0.27, "grad_norm": 1.6421705379152414, "learning_rate": 8.594041758346372e-06, "loss": 0.6923, "step": 4177 }, { "epoch": 0.27, "grad_norm": 2.7185726601860503, "learning_rate": 8.593321059096322e-06, "loss": 0.7104, "step": 4178 }, { "epoch": 0.27, "grad_norm": 1.672431516725592, "learning_rate": 8.592600205412884e-06, "loss": 0.7489, "step": 4179 }, { "epoch": 0.27, "grad_norm": 1.677843683542767, "learning_rate": 8.591879197327039e-06, "loss": 0.7524, "step": 4180 }, { "epoch": 0.27, "grad_norm": 1.4638526316945522, "learning_rate": 8.591158034869773e-06, "loss": 0.7112, "step": 4181 }, { "epoch": 0.27, "grad_norm": 1.5387951516032794, "learning_rate": 8.590436718072081e-06, "loss": 0.7639, "step": 4182 }, { "epoch": 0.27, "grad_norm": 1.7731662238944403, "learning_rate": 8.589715246964963e-06, "loss": 0.6499, "step": 4183 }, { "epoch": 0.27, "grad_norm": 1.48919894360054, "learning_rate": 8.588993621579427e-06, "loss": 0.7533, "step": 4184 }, { "epoch": 0.27, "grad_norm": 1.2716139033627594, "learning_rate": 8.588271841946485e-06, "loss": 0.681, "step": 4185 }, { "epoch": 0.27, "grad_norm": 1.6032188003167998, "learning_rate": 8.587549908097161e-06, "loss": 0.7209, "step": 4186 }, { "epoch": 0.27, "grad_norm": 1.533835752833546, "learning_rate": 8.58682782006248e-06, "loss": 0.7894, "step": 4187 }, { "epoch": 0.27, "grad_norm": 1.7314211459796665, "learning_rate": 8.586105577873476e-06, "loss": 0.6865, "step": 4188 }, { "epoch": 0.27, "grad_norm": 1.1512717551595693, "learning_rate": 8.585383181561191e-06, "loss": 0.711, "step": 4189 }, { "epoch": 0.27, "grad_norm": 1.5487428839839568, "learning_rate": 8.58466063115667e-06, "loss": 0.7423, "step": 4190 }, { "epoch": 0.27, "grad_norm": 1.7750006752972727, "learning_rate": 8.583937926690967e-06, "loss": 0.769, "step": 4191 }, { "epoch": 0.27, "grad_norm": 1.6049302882101453, "learning_rate": 8.583215068195141e-06, "loss": 0.7541, "step": 4192 }, { "epoch": 0.27, "grad_norm": 1.4322007921176867, "learning_rate": 8.582492055700264e-06, "loss": 0.6483, "step": 4193 }, { "epoch": 0.27, "grad_norm": 1.5754135833446188, "learning_rate": 8.581768889237405e-06, "loss": 0.6746, "step": 4194 }, { "epoch": 0.27, "grad_norm": 1.4912347061954532, "learning_rate": 8.581045568837647e-06, "loss": 0.695, "step": 4195 }, { "epoch": 0.27, "grad_norm": 1.4316766565506305, "learning_rate": 8.580322094532072e-06, "loss": 0.7803, "step": 4196 }, { "epoch": 0.27, "grad_norm": 1.1116431144702248, "learning_rate": 8.57959846635178e-06, "loss": 0.6532, "step": 4197 }, { "epoch": 0.27, "grad_norm": 1.4858901460059797, "learning_rate": 8.578874684327866e-06, "loss": 0.8041, "step": 4198 }, { "epoch": 0.27, "grad_norm": 1.5720064394454227, "learning_rate": 8.578150748491438e-06, "loss": 0.7875, "step": 4199 }, { "epoch": 0.27, "grad_norm": 1.6126062615399301, "learning_rate": 8.57742665887361e-06, "loss": 0.7377, "step": 4200 }, { "epoch": 0.27, "grad_norm": 1.7532844568930355, "learning_rate": 8.576702415505501e-06, "loss": 0.6405, "step": 4201 }, { "epoch": 0.27, "grad_norm": 2.518608945508104, "learning_rate": 8.575978018418239e-06, "loss": 0.7058, "step": 4202 }, { "epoch": 0.27, "grad_norm": 1.7531194256144378, "learning_rate": 8.575253467642954e-06, "loss": 0.7739, "step": 4203 }, { "epoch": 0.27, "grad_norm": 1.5965505472428163, "learning_rate": 8.57452876321079e-06, "loss": 0.6842, "step": 4204 }, { "epoch": 0.27, "grad_norm": 1.5557396166421171, "learning_rate": 8.57380390515289e-06, "loss": 0.7537, "step": 4205 }, { "epoch": 0.27, "grad_norm": 1.6379034702318445, "learning_rate": 8.573078893500406e-06, "loss": 0.7765, "step": 4206 }, { "epoch": 0.27, "grad_norm": 1.4650826892750346, "learning_rate": 8.5723537282845e-06, "loss": 0.6782, "step": 4207 }, { "epoch": 0.27, "grad_norm": 1.488861513519587, "learning_rate": 8.57162840953634e-06, "loss": 0.6486, "step": 4208 }, { "epoch": 0.27, "grad_norm": 1.4131776907378606, "learning_rate": 8.570902937287093e-06, "loss": 0.6897, "step": 4209 }, { "epoch": 0.27, "grad_norm": 1.5568896881258303, "learning_rate": 8.570177311567942e-06, "loss": 0.7856, "step": 4210 }, { "epoch": 0.27, "grad_norm": 1.4892653180773718, "learning_rate": 8.569451532410073e-06, "loss": 0.6605, "step": 4211 }, { "epoch": 0.27, "grad_norm": 1.5458569138867067, "learning_rate": 8.568725599844679e-06, "loss": 0.7175, "step": 4212 }, { "epoch": 0.27, "grad_norm": 1.557643954638193, "learning_rate": 8.567999513902953e-06, "loss": 0.6923, "step": 4213 }, { "epoch": 0.27, "grad_norm": 1.5292135523355639, "learning_rate": 8.56727327461611e-06, "loss": 0.6839, "step": 4214 }, { "epoch": 0.27, "grad_norm": 1.143652067047236, "learning_rate": 8.566546882015355e-06, "loss": 0.7376, "step": 4215 }, { "epoch": 0.27, "grad_norm": 1.144853187670242, "learning_rate": 8.56582033613191e-06, "loss": 0.605, "step": 4216 }, { "epoch": 0.27, "grad_norm": 1.4303277350134085, "learning_rate": 8.565093636996999e-06, "loss": 0.76, "step": 4217 }, { "epoch": 0.27, "grad_norm": 1.6365391669107843, "learning_rate": 8.564366784641855e-06, "loss": 0.7934, "step": 4218 }, { "epoch": 0.27, "grad_norm": 1.5278929085285806, "learning_rate": 8.563639779097717e-06, "loss": 0.6567, "step": 4219 }, { "epoch": 0.27, "grad_norm": 1.7761760178916184, "learning_rate": 8.56291262039583e-06, "loss": 0.707, "step": 4220 }, { "epoch": 0.27, "grad_norm": 1.5518268930234433, "learning_rate": 8.562185308567443e-06, "loss": 0.6874, "step": 4221 }, { "epoch": 0.27, "grad_norm": 1.6871608232363628, "learning_rate": 8.56145784364382e-06, "loss": 0.7498, "step": 4222 }, { "epoch": 0.27, "grad_norm": 1.352925475846104, "learning_rate": 8.56073022565622e-06, "loss": 0.7774, "step": 4223 }, { "epoch": 0.27, "grad_norm": 1.5572810850303604, "learning_rate": 8.560002454635917e-06, "loss": 0.7052, "step": 4224 }, { "epoch": 0.27, "grad_norm": 1.7579250791115955, "learning_rate": 8.55927453061419e-06, "loss": 0.7323, "step": 4225 }, { "epoch": 0.27, "grad_norm": 1.6862055697886618, "learning_rate": 8.558546453622322e-06, "loss": 0.7065, "step": 4226 }, { "epoch": 0.27, "grad_norm": 1.3169392462056728, "learning_rate": 8.557818223691607e-06, "loss": 0.6851, "step": 4227 }, { "epoch": 0.27, "grad_norm": 1.1868120606037684, "learning_rate": 8.557089840853338e-06, "loss": 0.7204, "step": 4228 }, { "epoch": 0.27, "grad_norm": 1.761480342406661, "learning_rate": 8.556361305138825e-06, "loss": 0.7333, "step": 4229 }, { "epoch": 0.27, "grad_norm": 1.5750292126612309, "learning_rate": 8.555632616579374e-06, "loss": 0.6769, "step": 4230 }, { "epoch": 0.27, "grad_norm": 1.5560123705775495, "learning_rate": 8.554903775206305e-06, "loss": 0.7028, "step": 4231 }, { "epoch": 0.27, "grad_norm": 1.7277587265776368, "learning_rate": 8.554174781050941e-06, "loss": 0.7151, "step": 4232 }, { "epoch": 0.27, "grad_norm": 1.8062838923028441, "learning_rate": 8.553445634144614e-06, "loss": 0.8513, "step": 4233 }, { "epoch": 0.27, "grad_norm": 1.7346228682001001, "learning_rate": 8.55271633451866e-06, "loss": 0.761, "step": 4234 }, { "epoch": 0.27, "grad_norm": 1.417569367855429, "learning_rate": 8.551986882204424e-06, "loss": 0.6799, "step": 4235 }, { "epoch": 0.27, "grad_norm": 1.6082323559458698, "learning_rate": 8.551257277233256e-06, "loss": 0.7213, "step": 4236 }, { "epoch": 0.27, "grad_norm": 1.4562234481862675, "learning_rate": 8.550527519636511e-06, "loss": 0.6898, "step": 4237 }, { "epoch": 0.27, "grad_norm": 1.4042625004560871, "learning_rate": 8.549797609445555e-06, "loss": 0.7542, "step": 4238 }, { "epoch": 0.27, "grad_norm": 1.603000608977496, "learning_rate": 8.549067546691756e-06, "loss": 0.6749, "step": 4239 }, { "epoch": 0.27, "grad_norm": 1.5760933054924526, "learning_rate": 8.548337331406491e-06, "loss": 0.7603, "step": 4240 }, { "epoch": 0.27, "grad_norm": 1.8147624854649131, "learning_rate": 8.547606963621146e-06, "loss": 0.6401, "step": 4241 }, { "epoch": 0.27, "grad_norm": 1.5482941111132802, "learning_rate": 8.546876443367105e-06, "loss": 0.6586, "step": 4242 }, { "epoch": 0.27, "grad_norm": 1.68819533164452, "learning_rate": 8.54614577067577e-06, "loss": 0.6949, "step": 4243 }, { "epoch": 0.27, "grad_norm": 1.6139396421199894, "learning_rate": 8.54541494557854e-06, "loss": 0.7512, "step": 4244 }, { "epoch": 0.27, "grad_norm": 1.477832240246115, "learning_rate": 8.544683968106827e-06, "loss": 0.7096, "step": 4245 }, { "epoch": 0.27, "grad_norm": 1.7024633492340926, "learning_rate": 8.543952838292043e-06, "loss": 0.7485, "step": 4246 }, { "epoch": 0.27, "grad_norm": 1.45852410277787, "learning_rate": 8.543221556165615e-06, "loss": 0.6993, "step": 4247 }, { "epoch": 0.27, "grad_norm": 1.7529086710436117, "learning_rate": 8.54249012175897e-06, "loss": 0.7083, "step": 4248 }, { "epoch": 0.27, "grad_norm": 1.520153006859418, "learning_rate": 8.541758535103542e-06, "loss": 0.7363, "step": 4249 }, { "epoch": 0.27, "grad_norm": 1.49238634292935, "learning_rate": 8.541026796230775e-06, "loss": 0.655, "step": 4250 }, { "epoch": 0.27, "grad_norm": 1.4116873097674505, "learning_rate": 8.540294905172117e-06, "loss": 0.6605, "step": 4251 }, { "epoch": 0.27, "grad_norm": 1.637236842700205, "learning_rate": 8.539562861959023e-06, "loss": 0.6804, "step": 4252 }, { "epoch": 0.27, "grad_norm": 1.4738243105740276, "learning_rate": 8.538830666622955e-06, "loss": 0.6932, "step": 4253 }, { "epoch": 0.27, "grad_norm": 1.447707936750391, "learning_rate": 8.53809831919538e-06, "loss": 0.72, "step": 4254 }, { "epoch": 0.27, "grad_norm": 1.5937223461717869, "learning_rate": 8.537365819707776e-06, "loss": 0.6569, "step": 4255 }, { "epoch": 0.27, "grad_norm": 1.5987389012622382, "learning_rate": 8.53663316819162e-06, "loss": 0.7293, "step": 4256 }, { "epoch": 0.27, "grad_norm": 1.648044453969651, "learning_rate": 8.535900364678403e-06, "loss": 0.6864, "step": 4257 }, { "epoch": 0.27, "grad_norm": 1.5314980229140616, "learning_rate": 8.535167409199618e-06, "loss": 0.6521, "step": 4258 }, { "epoch": 0.27, "grad_norm": 2.266330989037921, "learning_rate": 8.534434301786767e-06, "loss": 0.7682, "step": 4259 }, { "epoch": 0.27, "grad_norm": 1.8214576357194645, "learning_rate": 8.533701042471356e-06, "loss": 0.7816, "step": 4260 }, { "epoch": 0.27, "grad_norm": 1.4231257940272384, "learning_rate": 8.532967631284898e-06, "loss": 0.68, "step": 4261 }, { "epoch": 0.27, "grad_norm": 1.4625959213043251, "learning_rate": 8.532234068258918e-06, "loss": 0.6661, "step": 4262 }, { "epoch": 0.27, "grad_norm": 1.6183324566827513, "learning_rate": 8.531500353424937e-06, "loss": 0.7008, "step": 4263 }, { "epoch": 0.27, "grad_norm": 1.647428650544757, "learning_rate": 8.530766486814495e-06, "loss": 0.7578, "step": 4264 }, { "epoch": 0.27, "grad_norm": 1.650330847586034, "learning_rate": 8.530032468459126e-06, "loss": 0.7062, "step": 4265 }, { "epoch": 0.27, "grad_norm": 1.6032310249598933, "learning_rate": 8.529298298390379e-06, "loss": 0.7885, "step": 4266 }, { "epoch": 0.27, "grad_norm": 1.2485104873623847, "learning_rate": 8.528563976639807e-06, "loss": 0.6017, "step": 4267 }, { "epoch": 0.27, "grad_norm": 1.6439180002987286, "learning_rate": 8.527829503238972e-06, "loss": 0.7696, "step": 4268 }, { "epoch": 0.27, "grad_norm": 1.6244603702233227, "learning_rate": 8.527094878219435e-06, "loss": 0.606, "step": 4269 }, { "epoch": 0.27, "grad_norm": 1.5618832312033109, "learning_rate": 8.526360101612774e-06, "loss": 0.7259, "step": 4270 }, { "epoch": 0.27, "grad_norm": 1.5907335142951318, "learning_rate": 8.525625173450564e-06, "loss": 0.7996, "step": 4271 }, { "epoch": 0.27, "grad_norm": 1.6345772441991429, "learning_rate": 8.524890093764395e-06, "loss": 0.7401, "step": 4272 }, { "epoch": 0.27, "grad_norm": 1.4363078679939987, "learning_rate": 8.524154862585854e-06, "loss": 0.7264, "step": 4273 }, { "epoch": 0.27, "grad_norm": 1.753972194685993, "learning_rate": 8.523419479946545e-06, "loss": 0.6214, "step": 4274 }, { "epoch": 0.27, "grad_norm": 2.438387998884761, "learning_rate": 8.522683945878068e-06, "loss": 0.7493, "step": 4275 }, { "epoch": 0.27, "grad_norm": 1.6081120477833446, "learning_rate": 8.521948260412038e-06, "loss": 0.625, "step": 4276 }, { "epoch": 0.27, "grad_norm": 1.490287580240692, "learning_rate": 8.52121242358007e-06, "loss": 0.7662, "step": 4277 }, { "epoch": 0.27, "grad_norm": 1.5798862499311184, "learning_rate": 8.520476435413794e-06, "loss": 0.6786, "step": 4278 }, { "epoch": 0.27, "grad_norm": 1.8111157130860323, "learning_rate": 8.519740295944838e-06, "loss": 0.8128, "step": 4279 }, { "epoch": 0.27, "grad_norm": 1.6331902385905737, "learning_rate": 8.51900400520484e-06, "loss": 0.6576, "step": 4280 }, { "epoch": 0.27, "grad_norm": 1.5480223311862398, "learning_rate": 8.518267563225443e-06, "loss": 0.6814, "step": 4281 }, { "epoch": 0.27, "grad_norm": 1.588088714959429, "learning_rate": 8.5175309700383e-06, "loss": 0.696, "step": 4282 }, { "epoch": 0.27, "grad_norm": 1.602421081605116, "learning_rate": 8.51679422567507e-06, "loss": 0.6961, "step": 4283 }, { "epoch": 0.27, "grad_norm": 1.4960102168778444, "learning_rate": 8.51605733016741e-06, "loss": 0.7576, "step": 4284 }, { "epoch": 0.27, "grad_norm": 1.2484124528128206, "learning_rate": 8.515320283546996e-06, "loss": 0.653, "step": 4285 }, { "epoch": 0.27, "grad_norm": 1.1218915109154988, "learning_rate": 8.514583085845502e-06, "loss": 0.6383, "step": 4286 }, { "epoch": 0.27, "grad_norm": 1.5998129701906612, "learning_rate": 8.513845737094613e-06, "loss": 0.7296, "step": 4287 }, { "epoch": 0.27, "grad_norm": 1.2052396237041991, "learning_rate": 8.513108237326016e-06, "loss": 0.6151, "step": 4288 }, { "epoch": 0.27, "grad_norm": 1.5875308082195827, "learning_rate": 8.51237058657141e-06, "loss": 0.7466, "step": 4289 }, { "epoch": 0.27, "grad_norm": 1.6898766867156318, "learning_rate": 8.511632784862498e-06, "loss": 0.6718, "step": 4290 }, { "epoch": 0.27, "grad_norm": 1.4785232348975335, "learning_rate": 8.510894832230988e-06, "loss": 0.7113, "step": 4291 }, { "epoch": 0.27, "grad_norm": 2.0155614455765507, "learning_rate": 8.510156728708594e-06, "loss": 0.7546, "step": 4292 }, { "epoch": 0.27, "grad_norm": 1.7642548804992932, "learning_rate": 8.50941847432704e-06, "loss": 0.708, "step": 4293 }, { "epoch": 0.27, "grad_norm": 1.1162990304182983, "learning_rate": 8.508680069118055e-06, "loss": 0.6755, "step": 4294 }, { "epoch": 0.27, "grad_norm": 1.790065348098756, "learning_rate": 8.507941513113372e-06, "loss": 0.7666, "step": 4295 }, { "epoch": 0.27, "grad_norm": 1.5249007901199976, "learning_rate": 8.507202806344735e-06, "loss": 0.7784, "step": 4296 }, { "epoch": 0.28, "grad_norm": 1.6217270916052333, "learning_rate": 8.506463948843891e-06, "loss": 0.7125, "step": 4297 }, { "epoch": 0.28, "grad_norm": 1.8089907813448178, "learning_rate": 8.505724940642595e-06, "loss": 0.7063, "step": 4298 }, { "epoch": 0.28, "grad_norm": 1.567954178085098, "learning_rate": 8.504985781772606e-06, "loss": 0.749, "step": 4299 }, { "epoch": 0.28, "grad_norm": 1.5329159487086905, "learning_rate": 8.504246472265693e-06, "loss": 0.7833, "step": 4300 }, { "epoch": 0.28, "grad_norm": 1.6107205665829192, "learning_rate": 8.503507012153632e-06, "loss": 0.8636, "step": 4301 }, { "epoch": 0.28, "grad_norm": 1.5423149833348102, "learning_rate": 8.502767401468202e-06, "loss": 0.7039, "step": 4302 }, { "epoch": 0.28, "grad_norm": 1.5419816992434296, "learning_rate": 8.502027640241188e-06, "loss": 0.7099, "step": 4303 }, { "epoch": 0.28, "grad_norm": 1.45345865523929, "learning_rate": 8.501287728504383e-06, "loss": 0.7086, "step": 4304 }, { "epoch": 0.28, "grad_norm": 1.4263561207924402, "learning_rate": 8.500547666289592e-06, "loss": 0.7421, "step": 4305 }, { "epoch": 0.28, "grad_norm": 1.6037259184205677, "learning_rate": 8.499807453628616e-06, "loss": 0.7439, "step": 4306 }, { "epoch": 0.28, "grad_norm": 1.164247654610801, "learning_rate": 8.49906709055327e-06, "loss": 0.5911, "step": 4307 }, { "epoch": 0.28, "grad_norm": 1.1124207113768247, "learning_rate": 8.498326577095372e-06, "loss": 0.6642, "step": 4308 }, { "epoch": 0.28, "grad_norm": 1.5906304136049185, "learning_rate": 8.497585913286752e-06, "loss": 0.6979, "step": 4309 }, { "epoch": 0.28, "grad_norm": 1.4749171816966125, "learning_rate": 8.496845099159236e-06, "loss": 0.7133, "step": 4310 }, { "epoch": 0.28, "grad_norm": 1.666015141091116, "learning_rate": 8.496104134744667e-06, "loss": 0.7655, "step": 4311 }, { "epoch": 0.28, "grad_norm": 1.661175062613693, "learning_rate": 8.495363020074886e-06, "loss": 0.6598, "step": 4312 }, { "epoch": 0.28, "grad_norm": 1.8307092964939446, "learning_rate": 8.49462175518175e-06, "loss": 0.6627, "step": 4313 }, { "epoch": 0.28, "grad_norm": 1.7570946071349292, "learning_rate": 8.493880340097114e-06, "loss": 0.7271, "step": 4314 }, { "epoch": 0.28, "grad_norm": 1.582591062614835, "learning_rate": 8.493138774852842e-06, "loss": 0.7592, "step": 4315 }, { "epoch": 0.28, "grad_norm": 1.6417192535179197, "learning_rate": 8.492397059480805e-06, "loss": 0.7047, "step": 4316 }, { "epoch": 0.28, "grad_norm": 1.8161204800151312, "learning_rate": 8.49165519401288e-06, "loss": 0.6528, "step": 4317 }, { "epoch": 0.28, "grad_norm": 1.725326968006233, "learning_rate": 8.490913178480954e-06, "loss": 0.6753, "step": 4318 }, { "epoch": 0.28, "grad_norm": 1.108842406543106, "learning_rate": 8.490171012916915e-06, "loss": 0.6134, "step": 4319 }, { "epoch": 0.28, "grad_norm": 1.5821777287098144, "learning_rate": 8.489428697352658e-06, "loss": 0.6479, "step": 4320 }, { "epoch": 0.28, "grad_norm": 1.1788665592258087, "learning_rate": 8.488686231820089e-06, "loss": 0.6827, "step": 4321 }, { "epoch": 0.28, "grad_norm": 1.57715103599309, "learning_rate": 8.487943616351118e-06, "loss": 0.6577, "step": 4322 }, { "epoch": 0.28, "grad_norm": 1.6504937292846442, "learning_rate": 8.487200850977657e-06, "loss": 0.7646, "step": 4323 }, { "epoch": 0.28, "grad_norm": 2.2179201511789572, "learning_rate": 8.486457935731632e-06, "loss": 0.773, "step": 4324 }, { "epoch": 0.28, "grad_norm": 1.5060006573964624, "learning_rate": 8.48571487064497e-06, "loss": 0.6553, "step": 4325 }, { "epoch": 0.28, "grad_norm": 4.762997434211764, "learning_rate": 8.484971655749607e-06, "loss": 0.6396, "step": 4326 }, { "epoch": 0.28, "grad_norm": 1.6444830019942105, "learning_rate": 8.484228291077488e-06, "loss": 0.7103, "step": 4327 }, { "epoch": 0.28, "grad_norm": 1.6637682045183477, "learning_rate": 8.483484776660556e-06, "loss": 0.6936, "step": 4328 }, { "epoch": 0.28, "grad_norm": 1.5515777069997918, "learning_rate": 8.48274111253077e-06, "loss": 0.7977, "step": 4329 }, { "epoch": 0.28, "grad_norm": 2.0557657814523917, "learning_rate": 8.481997298720089e-06, "loss": 0.7348, "step": 4330 }, { "epoch": 0.28, "grad_norm": 4.1052485679309125, "learning_rate": 8.481253335260478e-06, "loss": 0.6892, "step": 4331 }, { "epoch": 0.28, "grad_norm": 1.4771030853355052, "learning_rate": 8.480509222183916e-06, "loss": 0.7699, "step": 4332 }, { "epoch": 0.28, "grad_norm": 1.0841093860266622, "learning_rate": 8.479764959522381e-06, "loss": 0.6759, "step": 4333 }, { "epoch": 0.28, "grad_norm": 1.4269661025869103, "learning_rate": 8.47902054730786e-06, "loss": 0.7416, "step": 4334 }, { "epoch": 0.28, "grad_norm": 1.407344167347918, "learning_rate": 8.478275985572346e-06, "loss": 0.6863, "step": 4335 }, { "epoch": 0.28, "grad_norm": 3.0578638343885647, "learning_rate": 8.477531274347839e-06, "loss": 0.7344, "step": 4336 }, { "epoch": 0.28, "grad_norm": 1.2970720202383759, "learning_rate": 8.476786413666346e-06, "loss": 0.7094, "step": 4337 }, { "epoch": 0.28, "grad_norm": 1.5132455368635278, "learning_rate": 8.476041403559878e-06, "loss": 0.7397, "step": 4338 }, { "epoch": 0.28, "grad_norm": 1.4606307996208368, "learning_rate": 8.475296244060454e-06, "loss": 0.6557, "step": 4339 }, { "epoch": 0.28, "grad_norm": 1.3772421509713946, "learning_rate": 8.4745509352001e-06, "loss": 0.7035, "step": 4340 }, { "epoch": 0.28, "grad_norm": 1.5305627504204304, "learning_rate": 8.473805477010848e-06, "loss": 0.7306, "step": 4341 }, { "epoch": 0.28, "grad_norm": 1.4282783266907424, "learning_rate": 8.473059869524738e-06, "loss": 0.6666, "step": 4342 }, { "epoch": 0.28, "grad_norm": 1.6861873195469945, "learning_rate": 8.472314112773813e-06, "loss": 0.6847, "step": 4343 }, { "epoch": 0.28, "grad_norm": 1.6599236945241043, "learning_rate": 8.47156820679012e-06, "loss": 0.747, "step": 4344 }, { "epoch": 0.28, "grad_norm": 1.5332851635401759, "learning_rate": 8.470822151605723e-06, "loss": 0.7977, "step": 4345 }, { "epoch": 0.28, "grad_norm": 1.2510905411632882, "learning_rate": 8.470075947252683e-06, "loss": 0.7023, "step": 4346 }, { "epoch": 0.28, "grad_norm": 1.067942734916567, "learning_rate": 8.469329593763069e-06, "loss": 0.6717, "step": 4347 }, { "epoch": 0.28, "grad_norm": 1.7290577306432477, "learning_rate": 8.46858309116896e-06, "loss": 0.7265, "step": 4348 }, { "epoch": 0.28, "grad_norm": 2.2270910571288387, "learning_rate": 8.467836439502439e-06, "loss": 0.6691, "step": 4349 }, { "epoch": 0.28, "grad_norm": 1.402874552495283, "learning_rate": 8.467089638795593e-06, "loss": 0.6711, "step": 4350 }, { "epoch": 0.28, "grad_norm": 1.4770281741811235, "learning_rate": 8.46634268908052e-06, "loss": 0.6686, "step": 4351 }, { "epoch": 0.28, "grad_norm": 1.4246145083003114, "learning_rate": 8.465595590389324e-06, "loss": 0.7456, "step": 4352 }, { "epoch": 0.28, "grad_norm": 1.6133687110650796, "learning_rate": 8.46484834275411e-06, "loss": 0.6976, "step": 4353 }, { "epoch": 0.28, "grad_norm": 1.6771774601841398, "learning_rate": 8.464100946206996e-06, "loss": 0.7435, "step": 4354 }, { "epoch": 0.28, "grad_norm": 1.4271598235196339, "learning_rate": 8.463353400780101e-06, "loss": 0.6916, "step": 4355 }, { "epoch": 0.28, "grad_norm": 1.5443714399710275, "learning_rate": 8.462605706505556e-06, "loss": 0.6846, "step": 4356 }, { "epoch": 0.28, "grad_norm": 1.7965415112953171, "learning_rate": 8.461857863415493e-06, "loss": 0.7641, "step": 4357 }, { "epoch": 0.28, "grad_norm": 1.61076022855905, "learning_rate": 8.461109871542053e-06, "loss": 0.7845, "step": 4358 }, { "epoch": 0.28, "grad_norm": 1.1766715790293745, "learning_rate": 8.460361730917384e-06, "loss": 0.579, "step": 4359 }, { "epoch": 0.28, "grad_norm": 1.5996569371659342, "learning_rate": 8.459613441573637e-06, "loss": 0.7369, "step": 4360 }, { "epoch": 0.28, "grad_norm": 1.795230645152983, "learning_rate": 8.458865003542975e-06, "loss": 0.7408, "step": 4361 }, { "epoch": 0.28, "grad_norm": 1.6261127664025725, "learning_rate": 8.458116416857565e-06, "loss": 0.703, "step": 4362 }, { "epoch": 0.28, "grad_norm": 1.8402613931093204, "learning_rate": 8.457367681549577e-06, "loss": 0.7778, "step": 4363 }, { "epoch": 0.28, "grad_norm": 1.5885671713201832, "learning_rate": 8.456618797651191e-06, "loss": 0.6762, "step": 4364 }, { "epoch": 0.28, "grad_norm": 1.4430826099705545, "learning_rate": 8.455869765194592e-06, "loss": 0.7047, "step": 4365 }, { "epoch": 0.28, "grad_norm": 1.1829338829265321, "learning_rate": 8.455120584211972e-06, "loss": 0.5505, "step": 4366 }, { "epoch": 0.28, "grad_norm": 3.062835996428377, "learning_rate": 8.45437125473553e-06, "loss": 0.6333, "step": 4367 }, { "epoch": 0.28, "grad_norm": 1.482531386415815, "learning_rate": 8.45362177679747e-06, "loss": 0.7169, "step": 4368 }, { "epoch": 0.28, "grad_norm": 1.6744069047047503, "learning_rate": 8.452872150430002e-06, "loss": 0.7213, "step": 4369 }, { "epoch": 0.28, "grad_norm": 1.4384539565601224, "learning_rate": 8.452122375665346e-06, "loss": 0.764, "step": 4370 }, { "epoch": 0.28, "grad_norm": 1.5744442404645382, "learning_rate": 8.451372452535724e-06, "loss": 0.6991, "step": 4371 }, { "epoch": 0.28, "grad_norm": 1.6346576990078765, "learning_rate": 8.450622381073367e-06, "loss": 0.756, "step": 4372 }, { "epoch": 0.28, "grad_norm": 1.7387825570705209, "learning_rate": 8.44987216131051e-06, "loss": 0.6994, "step": 4373 }, { "epoch": 0.28, "grad_norm": 1.3790073778418859, "learning_rate": 8.449121793279395e-06, "loss": 0.6295, "step": 4374 }, { "epoch": 0.28, "grad_norm": 2.1282759816303156, "learning_rate": 8.448371277012275e-06, "loss": 0.7488, "step": 4375 }, { "epoch": 0.28, "grad_norm": 1.36648377871593, "learning_rate": 8.447620612541405e-06, "loss": 0.6393, "step": 4376 }, { "epoch": 0.28, "grad_norm": 1.4800997560065128, "learning_rate": 8.446869799899042e-06, "loss": 0.6422, "step": 4377 }, { "epoch": 0.28, "grad_norm": 1.6285807147577354, "learning_rate": 8.44611883911746e-06, "loss": 0.7797, "step": 4378 }, { "epoch": 0.28, "grad_norm": 1.5405269031660989, "learning_rate": 8.44536773022893e-06, "loss": 0.6641, "step": 4379 }, { "epoch": 0.28, "grad_norm": 1.3961115522457257, "learning_rate": 8.444616473265737e-06, "loss": 0.6302, "step": 4380 }, { "epoch": 0.28, "grad_norm": 1.5887720730447046, "learning_rate": 8.443865068260164e-06, "loss": 0.6712, "step": 4381 }, { "epoch": 0.28, "grad_norm": 1.4835836496567998, "learning_rate": 8.443113515244508e-06, "loss": 0.6367, "step": 4382 }, { "epoch": 0.28, "grad_norm": 1.639657349437415, "learning_rate": 8.442361814251069e-06, "loss": 0.6585, "step": 4383 }, { "epoch": 0.28, "grad_norm": 2.395630467527593, "learning_rate": 8.44160996531215e-06, "loss": 0.7834, "step": 4384 }, { "epoch": 0.28, "grad_norm": 1.5551743027512714, "learning_rate": 8.440857968460068e-06, "loss": 0.8522, "step": 4385 }, { "epoch": 0.28, "grad_norm": 1.4716992417001358, "learning_rate": 8.440105823727143e-06, "loss": 0.7496, "step": 4386 }, { "epoch": 0.28, "grad_norm": 1.6402723258192453, "learning_rate": 8.439353531145695e-06, "loss": 0.6469, "step": 4387 }, { "epoch": 0.28, "grad_norm": 1.4088802172205863, "learning_rate": 8.43860109074806e-06, "loss": 0.7008, "step": 4388 }, { "epoch": 0.28, "grad_norm": 1.6884968508040012, "learning_rate": 8.437848502566576e-06, "loss": 0.736, "step": 4389 }, { "epoch": 0.28, "grad_norm": 1.6279992650254633, "learning_rate": 8.437095766633587e-06, "loss": 0.7272, "step": 4390 }, { "epoch": 0.28, "grad_norm": 1.2155178169403158, "learning_rate": 8.436342882981445e-06, "loss": 0.7157, "step": 4391 }, { "epoch": 0.28, "grad_norm": 1.8649179241162752, "learning_rate": 8.435589851642507e-06, "loss": 0.7149, "step": 4392 }, { "epoch": 0.28, "grad_norm": 1.4985909431294244, "learning_rate": 8.434836672649134e-06, "loss": 0.6712, "step": 4393 }, { "epoch": 0.28, "grad_norm": 2.148630451401209, "learning_rate": 8.4340833460337e-06, "loss": 0.7255, "step": 4394 }, { "epoch": 0.28, "grad_norm": 1.506635906376387, "learning_rate": 8.433329871828582e-06, "loss": 0.7531, "step": 4395 }, { "epoch": 0.28, "grad_norm": 1.7086516217657526, "learning_rate": 8.432576250066158e-06, "loss": 0.5989, "step": 4396 }, { "epoch": 0.28, "grad_norm": 1.775591443264718, "learning_rate": 8.431822480778818e-06, "loss": 0.6624, "step": 4397 }, { "epoch": 0.28, "grad_norm": 1.4934711002692904, "learning_rate": 8.431068563998962e-06, "loss": 0.6775, "step": 4398 }, { "epoch": 0.28, "grad_norm": 1.8162520793113444, "learning_rate": 8.430314499758986e-06, "loss": 0.8429, "step": 4399 }, { "epoch": 0.28, "grad_norm": 1.4066870724367269, "learning_rate": 8.429560288091305e-06, "loss": 0.6803, "step": 4400 }, { "epoch": 0.28, "grad_norm": 1.8165834796902929, "learning_rate": 8.428805929028327e-06, "loss": 0.7754, "step": 4401 }, { "epoch": 0.28, "grad_norm": 1.2740937864449426, "learning_rate": 8.428051422602475e-06, "loss": 0.7159, "step": 4402 }, { "epoch": 0.28, "grad_norm": 1.5335349561085105, "learning_rate": 8.427296768846176e-06, "loss": 0.7379, "step": 4403 }, { "epoch": 0.28, "grad_norm": 3.0510656924912536, "learning_rate": 8.426541967791863e-06, "loss": 0.6828, "step": 4404 }, { "epoch": 0.28, "grad_norm": 1.7186188589329097, "learning_rate": 8.425787019471979e-06, "loss": 0.7086, "step": 4405 }, { "epoch": 0.28, "grad_norm": 1.8481756094005277, "learning_rate": 8.425031923918964e-06, "loss": 0.7569, "step": 4406 }, { "epoch": 0.28, "grad_norm": 1.8239337161765778, "learning_rate": 8.424276681165276e-06, "loss": 0.7451, "step": 4407 }, { "epoch": 0.28, "grad_norm": 1.4811479315809097, "learning_rate": 8.42352129124337e-06, "loss": 0.737, "step": 4408 }, { "epoch": 0.28, "grad_norm": 1.7032126566593961, "learning_rate": 8.422765754185716e-06, "loss": 0.7213, "step": 4409 }, { "epoch": 0.28, "grad_norm": 1.7602686794701226, "learning_rate": 8.422010070024779e-06, "loss": 0.7332, "step": 4410 }, { "epoch": 0.28, "grad_norm": 1.5024476875867911, "learning_rate": 8.421254238793041e-06, "loss": 0.7618, "step": 4411 }, { "epoch": 0.28, "grad_norm": 1.53780868803912, "learning_rate": 8.420498260522985e-06, "loss": 0.6939, "step": 4412 }, { "epoch": 0.28, "grad_norm": 1.1726928521940851, "learning_rate": 8.419742135247099e-06, "loss": 0.6561, "step": 4413 }, { "epoch": 0.28, "grad_norm": 1.603671577429729, "learning_rate": 8.418985862997886e-06, "loss": 0.8032, "step": 4414 }, { "epoch": 0.28, "grad_norm": 1.636365646795116, "learning_rate": 8.418229443807842e-06, "loss": 0.7272, "step": 4415 }, { "epoch": 0.28, "grad_norm": 1.4161512877097715, "learning_rate": 8.417472877709479e-06, "loss": 0.642, "step": 4416 }, { "epoch": 0.28, "grad_norm": 1.6151674621484642, "learning_rate": 8.416716164735315e-06, "loss": 0.7352, "step": 4417 }, { "epoch": 0.28, "grad_norm": 2.280494760163681, "learning_rate": 8.415959304917868e-06, "loss": 0.7787, "step": 4418 }, { "epoch": 0.28, "grad_norm": 1.1776726101584434, "learning_rate": 8.415202298289668e-06, "loss": 0.683, "step": 4419 }, { "epoch": 0.28, "grad_norm": 1.6399668401014544, "learning_rate": 8.41444514488325e-06, "loss": 0.7365, "step": 4420 }, { "epoch": 0.28, "grad_norm": 1.8922897553120532, "learning_rate": 8.413687844731155e-06, "loss": 0.728, "step": 4421 }, { "epoch": 0.28, "grad_norm": 1.8524288852490907, "learning_rate": 8.41293039786593e-06, "loss": 0.7228, "step": 4422 }, { "epoch": 0.28, "grad_norm": 1.5856629741607549, "learning_rate": 8.412172804320127e-06, "loss": 0.7549, "step": 4423 }, { "epoch": 0.28, "grad_norm": 1.5379391747676157, "learning_rate": 8.411415064126306e-06, "loss": 0.7602, "step": 4424 }, { "epoch": 0.28, "grad_norm": 1.6782193956487106, "learning_rate": 8.410657177317035e-06, "loss": 0.6957, "step": 4425 }, { "epoch": 0.28, "grad_norm": 1.2430526800473494, "learning_rate": 8.409899143924885e-06, "loss": 0.6767, "step": 4426 }, { "epoch": 0.28, "grad_norm": 1.4423749372312724, "learning_rate": 8.409140963982436e-06, "loss": 0.6322, "step": 4427 }, { "epoch": 0.28, "grad_norm": 1.6790090076143591, "learning_rate": 8.40838263752227e-06, "loss": 0.7156, "step": 4428 }, { "epoch": 0.28, "grad_norm": 1.8322434017936093, "learning_rate": 8.407624164576982e-06, "loss": 0.8421, "step": 4429 }, { "epoch": 0.28, "grad_norm": 1.1506370727076813, "learning_rate": 8.406865545179165e-06, "loss": 0.7148, "step": 4430 }, { "epoch": 0.28, "grad_norm": 1.086493038414303, "learning_rate": 8.406106779361429e-06, "loss": 0.6761, "step": 4431 }, { "epoch": 0.28, "grad_norm": 1.7326754201007266, "learning_rate": 8.405347867156379e-06, "loss": 0.7252, "step": 4432 }, { "epoch": 0.28, "grad_norm": 1.7476981884456517, "learning_rate": 8.404588808596635e-06, "loss": 0.7971, "step": 4433 }, { "epoch": 0.28, "grad_norm": 1.6817822373650357, "learning_rate": 8.403829603714817e-06, "loss": 0.7596, "step": 4434 }, { "epoch": 0.28, "grad_norm": 1.5407475166271472, "learning_rate": 8.403070252543555e-06, "loss": 0.7389, "step": 4435 }, { "epoch": 0.28, "grad_norm": 2.129937095120733, "learning_rate": 8.402310755115483e-06, "loss": 0.7371, "step": 4436 }, { "epoch": 0.28, "grad_norm": 1.322159151889036, "learning_rate": 8.401551111463246e-06, "loss": 0.626, "step": 4437 }, { "epoch": 0.28, "grad_norm": 1.3836857695382931, "learning_rate": 8.400791321619489e-06, "loss": 0.6637, "step": 4438 }, { "epoch": 0.28, "grad_norm": 1.529066548092615, "learning_rate": 8.400031385616868e-06, "loss": 0.7138, "step": 4439 }, { "epoch": 0.28, "grad_norm": 1.5085886843329737, "learning_rate": 8.399271303488041e-06, "loss": 0.8241, "step": 4440 }, { "epoch": 0.28, "grad_norm": 1.5315505053591594, "learning_rate": 8.398511075265677e-06, "loss": 0.7091, "step": 4441 }, { "epoch": 0.28, "grad_norm": 1.5654007237366938, "learning_rate": 8.397750700982449e-06, "loss": 0.6701, "step": 4442 }, { "epoch": 0.28, "grad_norm": 1.2013720807881332, "learning_rate": 8.396990180671034e-06, "loss": 0.6636, "step": 4443 }, { "epoch": 0.28, "grad_norm": 1.5870798965526516, "learning_rate": 8.39622951436412e-06, "loss": 0.6788, "step": 4444 }, { "epoch": 0.28, "grad_norm": 1.6375130460640441, "learning_rate": 8.395468702094399e-06, "loss": 0.7417, "step": 4445 }, { "epoch": 0.28, "grad_norm": 1.7573472679584732, "learning_rate": 8.394707743894565e-06, "loss": 0.7149, "step": 4446 }, { "epoch": 0.28, "grad_norm": 1.6151868191125975, "learning_rate": 8.393946639797328e-06, "loss": 0.774, "step": 4447 }, { "epoch": 0.28, "grad_norm": 1.174698559504571, "learning_rate": 8.393185389835396e-06, "loss": 0.7076, "step": 4448 }, { "epoch": 0.28, "grad_norm": 1.7369530453175843, "learning_rate": 8.392423994041486e-06, "loss": 0.7098, "step": 4449 }, { "epoch": 0.28, "grad_norm": 1.716519229002518, "learning_rate": 8.39166245244832e-06, "loss": 0.668, "step": 4450 }, { "epoch": 0.28, "grad_norm": 1.5196218079176476, "learning_rate": 8.39090076508863e-06, "loss": 0.7254, "step": 4451 }, { "epoch": 0.28, "grad_norm": 1.9901226361240907, "learning_rate": 8.390138931995148e-06, "loss": 0.8123, "step": 4452 }, { "epoch": 0.29, "grad_norm": 1.5384783176242947, "learning_rate": 8.389376953200622e-06, "loss": 0.7248, "step": 4453 }, { "epoch": 0.29, "grad_norm": 1.6190046282667172, "learning_rate": 8.388614828737794e-06, "loss": 0.6413, "step": 4454 }, { "epoch": 0.29, "grad_norm": 1.5726751003215438, "learning_rate": 8.387852558639422e-06, "loss": 0.7286, "step": 4455 }, { "epoch": 0.29, "grad_norm": 1.589243093445895, "learning_rate": 8.387090142938264e-06, "loss": 0.6791, "step": 4456 }, { "epoch": 0.29, "grad_norm": 1.4978970001049436, "learning_rate": 8.386327581667091e-06, "loss": 0.7102, "step": 4457 }, { "epoch": 0.29, "grad_norm": 1.5261461436054395, "learning_rate": 8.385564874858674e-06, "loss": 0.6804, "step": 4458 }, { "epoch": 0.29, "grad_norm": 1.5494445529853735, "learning_rate": 8.384802022545793e-06, "loss": 0.7006, "step": 4459 }, { "epoch": 0.29, "grad_norm": 1.7191122497005347, "learning_rate": 8.384039024761233e-06, "loss": 0.6889, "step": 4460 }, { "epoch": 0.29, "grad_norm": 1.5280752128579824, "learning_rate": 8.383275881537786e-06, "loss": 0.6741, "step": 4461 }, { "epoch": 0.29, "grad_norm": 1.4107760562044525, "learning_rate": 8.382512592908251e-06, "loss": 0.7086, "step": 4462 }, { "epoch": 0.29, "grad_norm": 1.570671129840295, "learning_rate": 8.381749158905433e-06, "loss": 0.7673, "step": 4463 }, { "epoch": 0.29, "grad_norm": 1.6146304260383217, "learning_rate": 8.380985579562142e-06, "loss": 0.6528, "step": 4464 }, { "epoch": 0.29, "grad_norm": 1.57044590980453, "learning_rate": 8.380221854911195e-06, "loss": 0.7432, "step": 4465 }, { "epoch": 0.29, "grad_norm": 1.5431819931440542, "learning_rate": 8.379457984985416e-06, "loss": 0.6317, "step": 4466 }, { "epoch": 0.29, "grad_norm": 1.8052108334826016, "learning_rate": 8.378693969817633e-06, "loss": 0.7872, "step": 4467 }, { "epoch": 0.29, "grad_norm": 1.184432345515197, "learning_rate": 8.377929809440684e-06, "loss": 0.5892, "step": 4468 }, { "epoch": 0.29, "grad_norm": 1.8389886863153448, "learning_rate": 8.37716550388741e-06, "loss": 0.7311, "step": 4469 }, { "epoch": 0.29, "grad_norm": 1.5828366894320214, "learning_rate": 8.376401053190658e-06, "loss": 0.7117, "step": 4470 }, { "epoch": 0.29, "grad_norm": 2.7050677064665876, "learning_rate": 8.375636457383282e-06, "loss": 0.7022, "step": 4471 }, { "epoch": 0.29, "grad_norm": 1.7576945396423311, "learning_rate": 8.374871716498147e-06, "loss": 0.6842, "step": 4472 }, { "epoch": 0.29, "grad_norm": 1.571107661264624, "learning_rate": 8.374106830568117e-06, "loss": 0.7596, "step": 4473 }, { "epoch": 0.29, "grad_norm": 1.1025939321842733, "learning_rate": 8.373341799626065e-06, "loss": 0.7455, "step": 4474 }, { "epoch": 0.29, "grad_norm": 1.5944210628290696, "learning_rate": 8.372576623704872e-06, "loss": 0.6794, "step": 4475 }, { "epoch": 0.29, "grad_norm": 1.48078905848755, "learning_rate": 8.37181130283742e-06, "loss": 0.6398, "step": 4476 }, { "epoch": 0.29, "grad_norm": 1.4771889436687071, "learning_rate": 8.371045837056603e-06, "loss": 0.6651, "step": 4477 }, { "epoch": 0.29, "grad_norm": 1.4659398347087962, "learning_rate": 8.370280226395322e-06, "loss": 0.7244, "step": 4478 }, { "epoch": 0.29, "grad_norm": 1.6894571171525377, "learning_rate": 8.369514470886478e-06, "loss": 0.6979, "step": 4479 }, { "epoch": 0.29, "grad_norm": 1.4110952255511298, "learning_rate": 8.368748570562982e-06, "loss": 0.6031, "step": 4480 }, { "epoch": 0.29, "grad_norm": 1.9963294873318576, "learning_rate": 8.36798252545775e-06, "loss": 0.7111, "step": 4481 }, { "epoch": 0.29, "grad_norm": 1.0793892889066008, "learning_rate": 8.367216335603707e-06, "loss": 0.585, "step": 4482 }, { "epoch": 0.29, "grad_norm": 1.2670624026039614, "learning_rate": 8.366450001033784e-06, "loss": 0.6811, "step": 4483 }, { "epoch": 0.29, "grad_norm": 1.6824930330350203, "learning_rate": 8.36568352178091e-06, "loss": 0.6776, "step": 4484 }, { "epoch": 0.29, "grad_norm": 2.2105843131493375, "learning_rate": 8.364916897878033e-06, "loss": 0.808, "step": 4485 }, { "epoch": 0.29, "grad_norm": 1.4834343216232633, "learning_rate": 8.364150129358098e-06, "loss": 0.6502, "step": 4486 }, { "epoch": 0.29, "grad_norm": 1.3112666820869805, "learning_rate": 8.363383216254058e-06, "loss": 0.6754, "step": 4487 }, { "epoch": 0.29, "grad_norm": 1.2496962218188945, "learning_rate": 8.362616158598875e-06, "loss": 0.7522, "step": 4488 }, { "epoch": 0.29, "grad_norm": 1.9433430800104916, "learning_rate": 8.361848956425516e-06, "loss": 0.7581, "step": 4489 }, { "epoch": 0.29, "grad_norm": 1.336721335582212, "learning_rate": 8.361081609766954e-06, "loss": 0.6827, "step": 4490 }, { "epoch": 0.29, "grad_norm": 1.5182428516435484, "learning_rate": 8.360314118656165e-06, "loss": 0.6737, "step": 4491 }, { "epoch": 0.29, "grad_norm": 2.016744408801684, "learning_rate": 8.359546483126137e-06, "loss": 0.8235, "step": 4492 }, { "epoch": 0.29, "grad_norm": 1.7621047775651626, "learning_rate": 8.358778703209862e-06, "loss": 0.7426, "step": 4493 }, { "epoch": 0.29, "grad_norm": 1.2614117845860597, "learning_rate": 8.358010778940336e-06, "loss": 0.7006, "step": 4494 }, { "epoch": 0.29, "grad_norm": 1.6540234279450736, "learning_rate": 8.357242710350561e-06, "loss": 0.678, "step": 4495 }, { "epoch": 0.29, "grad_norm": 1.7690512665422384, "learning_rate": 8.35647449747355e-06, "loss": 0.7192, "step": 4496 }, { "epoch": 0.29, "grad_norm": 1.5359176153091405, "learning_rate": 8.355706140342317e-06, "loss": 0.7322, "step": 4497 }, { "epoch": 0.29, "grad_norm": 1.5538023016010025, "learning_rate": 8.354937638989887e-06, "loss": 0.735, "step": 4498 }, { "epoch": 0.29, "grad_norm": 1.588083844413315, "learning_rate": 8.354168993449285e-06, "loss": 0.736, "step": 4499 }, { "epoch": 0.29, "grad_norm": 1.7579122539916423, "learning_rate": 8.35340020375355e-06, "loss": 0.8424, "step": 4500 }, { "epoch": 0.29, "grad_norm": 1.649956906910062, "learning_rate": 8.352631269935719e-06, "loss": 0.6719, "step": 4501 }, { "epoch": 0.29, "grad_norm": 1.1893350535448748, "learning_rate": 8.351862192028842e-06, "loss": 0.662, "step": 4502 }, { "epoch": 0.29, "grad_norm": 1.5059762511870545, "learning_rate": 8.35109297006597e-06, "loss": 0.7642, "step": 4503 }, { "epoch": 0.29, "grad_norm": 1.8049844022290624, "learning_rate": 8.350323604080166e-06, "loss": 0.7339, "step": 4504 }, { "epoch": 0.29, "grad_norm": 1.576555812461427, "learning_rate": 8.349554094104491e-06, "loss": 0.7296, "step": 4505 }, { "epoch": 0.29, "grad_norm": 1.4663514470936236, "learning_rate": 8.34878444017202e-06, "loss": 0.6689, "step": 4506 }, { "epoch": 0.29, "grad_norm": 1.6197132922060713, "learning_rate": 8.348014642315831e-06, "loss": 0.7449, "step": 4507 }, { "epoch": 0.29, "grad_norm": 1.4155150810701533, "learning_rate": 8.347244700569008e-06, "loss": 0.6862, "step": 4508 }, { "epoch": 0.29, "grad_norm": 1.6430900424679729, "learning_rate": 8.346474614964642e-06, "loss": 0.7656, "step": 4509 }, { "epoch": 0.29, "grad_norm": 1.6137725212227927, "learning_rate": 8.345704385535826e-06, "loss": 0.6472, "step": 4510 }, { "epoch": 0.29, "grad_norm": 1.710147928488427, "learning_rate": 8.34493401231567e-06, "loss": 0.7447, "step": 4511 }, { "epoch": 0.29, "grad_norm": 1.5443401579092917, "learning_rate": 8.344163495337276e-06, "loss": 0.6656, "step": 4512 }, { "epoch": 0.29, "grad_norm": 1.4319323766853946, "learning_rate": 8.34339283463376e-06, "loss": 0.652, "step": 4513 }, { "epoch": 0.29, "grad_norm": 1.847425837354819, "learning_rate": 8.34262203023825e-06, "loss": 0.6134, "step": 4514 }, { "epoch": 0.29, "grad_norm": 1.1398161619278855, "learning_rate": 8.341851082183868e-06, "loss": 0.5575, "step": 4515 }, { "epoch": 0.29, "grad_norm": 1.2260348584078338, "learning_rate": 8.341079990503747e-06, "loss": 0.6873, "step": 4516 }, { "epoch": 0.29, "grad_norm": 1.5902371713825108, "learning_rate": 8.340308755231027e-06, "loss": 0.7026, "step": 4517 }, { "epoch": 0.29, "grad_norm": 1.2129922382115108, "learning_rate": 8.339537376398858e-06, "loss": 0.7011, "step": 4518 }, { "epoch": 0.29, "grad_norm": 1.9526575425739632, "learning_rate": 8.338765854040391e-06, "loss": 0.7044, "step": 4519 }, { "epoch": 0.29, "grad_norm": 2.2285489222037724, "learning_rate": 8.337994188188783e-06, "loss": 0.6778, "step": 4520 }, { "epoch": 0.29, "grad_norm": 1.6540287495800343, "learning_rate": 8.337222378877196e-06, "loss": 0.6939, "step": 4521 }, { "epoch": 0.29, "grad_norm": 1.4478755788584612, "learning_rate": 8.336450426138807e-06, "loss": 0.7032, "step": 4522 }, { "epoch": 0.29, "grad_norm": 1.5629487433592126, "learning_rate": 8.33567833000679e-06, "loss": 0.6581, "step": 4523 }, { "epoch": 0.29, "grad_norm": 3.2992299607606035, "learning_rate": 8.334906090514324e-06, "loss": 0.6723, "step": 4524 }, { "epoch": 0.29, "grad_norm": 1.4746464529235839, "learning_rate": 8.334133707694603e-06, "loss": 0.7253, "step": 4525 }, { "epoch": 0.29, "grad_norm": 1.5824962020039257, "learning_rate": 8.333361181580822e-06, "loss": 0.68, "step": 4526 }, { "epoch": 0.29, "grad_norm": 1.7197222520665296, "learning_rate": 8.33258851220618e-06, "loss": 0.7383, "step": 4527 }, { "epoch": 0.29, "grad_norm": 1.5896990787643444, "learning_rate": 8.33181569960389e-06, "loss": 0.7507, "step": 4528 }, { "epoch": 0.29, "grad_norm": 1.46323200005544, "learning_rate": 8.33104274380716e-06, "loss": 0.735, "step": 4529 }, { "epoch": 0.29, "grad_norm": 2.664925494758608, "learning_rate": 8.330269644849214e-06, "loss": 0.7322, "step": 4530 }, { "epoch": 0.29, "grad_norm": 1.6481811833642517, "learning_rate": 8.329496402763275e-06, "loss": 0.7386, "step": 4531 }, { "epoch": 0.29, "grad_norm": 1.6264692790930415, "learning_rate": 8.328723017582576e-06, "loss": 0.7665, "step": 4532 }, { "epoch": 0.29, "grad_norm": 2.601619008461621, "learning_rate": 8.327949489340359e-06, "loss": 0.8079, "step": 4533 }, { "epoch": 0.29, "grad_norm": 1.3349311343147559, "learning_rate": 8.327175818069864e-06, "loss": 0.6135, "step": 4534 }, { "epoch": 0.29, "grad_norm": 1.701523677870359, "learning_rate": 8.326402003804344e-06, "loss": 0.7346, "step": 4535 }, { "epoch": 0.29, "grad_norm": 1.2275073128081588, "learning_rate": 8.325628046577055e-06, "loss": 0.75, "step": 4536 }, { "epoch": 0.29, "grad_norm": 1.1848903284692873, "learning_rate": 8.324853946421261e-06, "loss": 0.6785, "step": 4537 }, { "epoch": 0.29, "grad_norm": 1.5936949994583451, "learning_rate": 8.324079703370232e-06, "loss": 0.7287, "step": 4538 }, { "epoch": 0.29, "grad_norm": 1.8005565434128996, "learning_rate": 8.323305317457241e-06, "loss": 0.7979, "step": 4539 }, { "epoch": 0.29, "grad_norm": 1.5273311429348793, "learning_rate": 8.32253078871557e-06, "loss": 0.6854, "step": 4540 }, { "epoch": 0.29, "grad_norm": 2.013726527883508, "learning_rate": 8.32175611717851e-06, "loss": 0.7491, "step": 4541 }, { "epoch": 0.29, "grad_norm": 1.5493628958779095, "learning_rate": 8.32098130287935e-06, "loss": 0.6614, "step": 4542 }, { "epoch": 0.29, "grad_norm": 1.6337641113269077, "learning_rate": 8.320206345851393e-06, "loss": 0.7166, "step": 4543 }, { "epoch": 0.29, "grad_norm": 1.1054338735002331, "learning_rate": 8.319431246127942e-06, "loss": 0.6538, "step": 4544 }, { "epoch": 0.29, "grad_norm": 1.195030837315327, "learning_rate": 8.318656003742314e-06, "loss": 0.655, "step": 4545 }, { "epoch": 0.29, "grad_norm": 1.8360385449029093, "learning_rate": 8.317880618727821e-06, "loss": 0.6978, "step": 4546 }, { "epoch": 0.29, "grad_norm": 1.456550829429107, "learning_rate": 8.317105091117795e-06, "loss": 0.6911, "step": 4547 }, { "epoch": 0.29, "grad_norm": 1.5803551567116745, "learning_rate": 8.316329420945559e-06, "loss": 0.7639, "step": 4548 }, { "epoch": 0.29, "grad_norm": 1.638030272236825, "learning_rate": 8.315553608244453e-06, "loss": 0.7189, "step": 4549 }, { "epoch": 0.29, "grad_norm": 1.2335896930318966, "learning_rate": 8.314777653047822e-06, "loss": 0.7221, "step": 4550 }, { "epoch": 0.29, "grad_norm": 1.5511790605077358, "learning_rate": 8.314001555389014e-06, "loss": 0.7537, "step": 4551 }, { "epoch": 0.29, "grad_norm": 1.084556439120142, "learning_rate": 8.31322531530138e-06, "loss": 0.7142, "step": 4552 }, { "epoch": 0.29, "grad_norm": 1.6978751086607091, "learning_rate": 8.312448932818284e-06, "loss": 0.8205, "step": 4553 }, { "epoch": 0.29, "grad_norm": 1.373587634242961, "learning_rate": 8.311672407973093e-06, "loss": 0.6868, "step": 4554 }, { "epoch": 0.29, "grad_norm": 1.5915923455803913, "learning_rate": 8.310895740799181e-06, "loss": 0.6071, "step": 4555 }, { "epoch": 0.29, "grad_norm": 1.3186223140697002, "learning_rate": 8.310118931329928e-06, "loss": 0.6941, "step": 4556 }, { "epoch": 0.29, "grad_norm": 1.782792812616765, "learning_rate": 8.30934197959872e-06, "loss": 0.7808, "step": 4557 }, { "epoch": 0.29, "grad_norm": 1.6097122870512508, "learning_rate": 8.308564885638946e-06, "loss": 0.6949, "step": 4558 }, { "epoch": 0.29, "grad_norm": 1.436128804244658, "learning_rate": 8.307787649484005e-06, "loss": 0.6891, "step": 4559 }, { "epoch": 0.29, "grad_norm": 1.6404300835844117, "learning_rate": 8.307010271167302e-06, "loss": 0.779, "step": 4560 }, { "epoch": 0.29, "grad_norm": 3.1027135566621142, "learning_rate": 8.306232750722248e-06, "loss": 0.7221, "step": 4561 }, { "epoch": 0.29, "grad_norm": 1.349353753428857, "learning_rate": 8.305455088182256e-06, "loss": 0.7119, "step": 4562 }, { "epoch": 0.29, "grad_norm": 1.57673056476385, "learning_rate": 8.30467728358075e-06, "loss": 0.7178, "step": 4563 }, { "epoch": 0.29, "grad_norm": 1.5710672175801172, "learning_rate": 8.303899336951157e-06, "loss": 0.7935, "step": 4564 }, { "epoch": 0.29, "grad_norm": 1.5592738477655261, "learning_rate": 8.303121248326917e-06, "loss": 0.7309, "step": 4565 }, { "epoch": 0.29, "grad_norm": 1.5210812095057116, "learning_rate": 8.302343017741464e-06, "loss": 0.7469, "step": 4566 }, { "epoch": 0.29, "grad_norm": 1.609763181642832, "learning_rate": 8.301564645228249e-06, "loss": 0.784, "step": 4567 }, { "epoch": 0.29, "grad_norm": 1.4342642202795104, "learning_rate": 8.30078613082072e-06, "loss": 0.6185, "step": 4568 }, { "epoch": 0.29, "grad_norm": 1.5841317307068812, "learning_rate": 8.300007474552343e-06, "loss": 0.6642, "step": 4569 }, { "epoch": 0.29, "grad_norm": 1.7825372569891473, "learning_rate": 8.299228676456575e-06, "loss": 0.7303, "step": 4570 }, { "epoch": 0.29, "grad_norm": 1.450285696027249, "learning_rate": 8.298449736566894e-06, "loss": 0.7604, "step": 4571 }, { "epoch": 0.29, "grad_norm": 1.384012491458053, "learning_rate": 8.297670654916772e-06, "loss": 0.7448, "step": 4572 }, { "epoch": 0.29, "grad_norm": 1.1885833581642244, "learning_rate": 8.296891431539696e-06, "loss": 0.6213, "step": 4573 }, { "epoch": 0.29, "grad_norm": 1.1666616318435947, "learning_rate": 8.296112066469153e-06, "loss": 0.6448, "step": 4574 }, { "epoch": 0.29, "grad_norm": 1.4997922531168042, "learning_rate": 8.29533255973864e-06, "loss": 0.7416, "step": 4575 }, { "epoch": 0.29, "grad_norm": 1.5572288996309591, "learning_rate": 8.29455291138166e-06, "loss": 0.7114, "step": 4576 }, { "epoch": 0.29, "grad_norm": 1.616966687953736, "learning_rate": 8.293773121431717e-06, "loss": 0.7872, "step": 4577 }, { "epoch": 0.29, "grad_norm": 1.533940066696176, "learning_rate": 8.292993189922326e-06, "loss": 0.7232, "step": 4578 }, { "epoch": 0.29, "grad_norm": 1.628024516421376, "learning_rate": 8.292213116887008e-06, "loss": 0.7183, "step": 4579 }, { "epoch": 0.29, "grad_norm": 1.6715414028457545, "learning_rate": 8.291432902359289e-06, "loss": 0.6772, "step": 4580 }, { "epoch": 0.29, "grad_norm": 1.4434500025325356, "learning_rate": 8.290652546372698e-06, "loss": 0.6461, "step": 4581 }, { "epoch": 0.29, "grad_norm": 1.4308345698700087, "learning_rate": 8.289872048960776e-06, "loss": 0.6347, "step": 4582 }, { "epoch": 0.29, "grad_norm": 1.5147734526289351, "learning_rate": 8.289091410157067e-06, "loss": 0.6219, "step": 4583 }, { "epoch": 0.29, "grad_norm": 1.05209403509706, "learning_rate": 8.288310629995119e-06, "loss": 0.8636, "step": 4584 }, { "epoch": 0.29, "grad_norm": 1.15049359358303, "learning_rate": 8.28752970850849e-06, "loss": 0.6303, "step": 4585 }, { "epoch": 0.29, "grad_norm": 1.8278588222685745, "learning_rate": 8.286748645730744e-06, "loss": 0.6687, "step": 4586 }, { "epoch": 0.29, "grad_norm": 1.892139077786448, "learning_rate": 8.285967441695445e-06, "loss": 0.7341, "step": 4587 }, { "epoch": 0.29, "grad_norm": 1.783811430098018, "learning_rate": 8.285186096436173e-06, "loss": 0.731, "step": 4588 }, { "epoch": 0.29, "grad_norm": 1.4191401616017336, "learning_rate": 8.284404609986505e-06, "loss": 0.6417, "step": 4589 }, { "epoch": 0.29, "grad_norm": 1.5754619435484796, "learning_rate": 8.283622982380027e-06, "loss": 0.7401, "step": 4590 }, { "epoch": 0.29, "grad_norm": 1.8913976888209505, "learning_rate": 8.282841213650334e-06, "loss": 0.7512, "step": 4591 }, { "epoch": 0.29, "grad_norm": 1.774278883533196, "learning_rate": 8.282059303831022e-06, "loss": 0.7788, "step": 4592 }, { "epoch": 0.29, "grad_norm": 1.7397368522061016, "learning_rate": 8.281277252955699e-06, "loss": 0.7122, "step": 4593 }, { "epoch": 0.29, "grad_norm": 1.8019376964241984, "learning_rate": 8.280495061057976e-06, "loss": 0.7629, "step": 4594 }, { "epoch": 0.29, "grad_norm": 1.9356623114541698, "learning_rate": 8.279712728171468e-06, "loss": 0.796, "step": 4595 }, { "epoch": 0.29, "grad_norm": 1.5998017669116897, "learning_rate": 8.278930254329798e-06, "loss": 0.7479, "step": 4596 }, { "epoch": 0.29, "grad_norm": 1.4987572310675552, "learning_rate": 8.278147639566596e-06, "loss": 0.7581, "step": 4597 }, { "epoch": 0.29, "grad_norm": 1.7199872224390458, "learning_rate": 8.277364883915496e-06, "loss": 0.6892, "step": 4598 }, { "epoch": 0.29, "grad_norm": 1.464241229305452, "learning_rate": 8.27658198741014e-06, "loss": 0.7067, "step": 4599 }, { "epoch": 0.29, "grad_norm": 2.1779481162098957, "learning_rate": 8.275798950084176e-06, "loss": 0.6953, "step": 4600 }, { "epoch": 0.29, "grad_norm": 1.4290971431203243, "learning_rate": 8.275015771971255e-06, "loss": 0.6414, "step": 4601 }, { "epoch": 0.29, "grad_norm": 1.859448087274376, "learning_rate": 8.27423245310504e-06, "loss": 0.7097, "step": 4602 }, { "epoch": 0.29, "grad_norm": 1.9939611613726071, "learning_rate": 8.273448993519194e-06, "loss": 0.7048, "step": 4603 }, { "epoch": 0.29, "grad_norm": 1.4596313591262806, "learning_rate": 8.272665393247388e-06, "loss": 0.6922, "step": 4604 }, { "epoch": 0.29, "grad_norm": 1.5393258080875607, "learning_rate": 8.2718816523233e-06, "loss": 0.7362, "step": 4605 }, { "epoch": 0.29, "grad_norm": 1.451996504364706, "learning_rate": 8.271097770780613e-06, "loss": 0.6742, "step": 4606 }, { "epoch": 0.29, "grad_norm": 1.4401405111374175, "learning_rate": 8.270313748653018e-06, "loss": 0.812, "step": 4607 }, { "epoch": 0.29, "grad_norm": 1.4444436706835622, "learning_rate": 8.269529585974212e-06, "loss": 0.7129, "step": 4608 }, { "epoch": 0.3, "grad_norm": 1.5250729699567702, "learning_rate": 8.268745282777893e-06, "loss": 0.7273, "step": 4609 }, { "epoch": 0.3, "grad_norm": 1.6418146888041434, "learning_rate": 8.267960839097768e-06, "loss": 0.732, "step": 4610 }, { "epoch": 0.3, "grad_norm": 1.5005790997576998, "learning_rate": 8.267176254967556e-06, "loss": 0.6877, "step": 4611 }, { "epoch": 0.3, "grad_norm": 1.3348169658466142, "learning_rate": 8.266391530420974e-06, "loss": 0.6531, "step": 4612 }, { "epoch": 0.3, "grad_norm": 1.7080098599409397, "learning_rate": 8.265606665491746e-06, "loss": 0.6886, "step": 4613 }, { "epoch": 0.3, "grad_norm": 1.5390487731640545, "learning_rate": 8.264821660213607e-06, "loss": 0.6428, "step": 4614 }, { "epoch": 0.3, "grad_norm": 1.9894395820073034, "learning_rate": 8.264036514620292e-06, "loss": 0.7696, "step": 4615 }, { "epoch": 0.3, "grad_norm": 1.5605217101085311, "learning_rate": 8.263251228745547e-06, "loss": 0.7625, "step": 4616 }, { "epoch": 0.3, "grad_norm": 1.7807842096043205, "learning_rate": 8.262465802623122e-06, "loss": 0.6968, "step": 4617 }, { "epoch": 0.3, "grad_norm": 1.4937589223936785, "learning_rate": 8.26168023628677e-06, "loss": 0.5718, "step": 4618 }, { "epoch": 0.3, "grad_norm": 1.5848840709648737, "learning_rate": 8.260894529770258e-06, "loss": 0.8117, "step": 4619 }, { "epoch": 0.3, "grad_norm": 1.620241361259445, "learning_rate": 8.260108683107348e-06, "loss": 0.7649, "step": 4620 }, { "epoch": 0.3, "grad_norm": 2.299952564513583, "learning_rate": 8.25932269633182e-06, "loss": 0.7377, "step": 4621 }, { "epoch": 0.3, "grad_norm": 1.5021759271873811, "learning_rate": 8.258536569477451e-06, "loss": 0.738, "step": 4622 }, { "epoch": 0.3, "grad_norm": 0.9752382044548858, "learning_rate": 8.257750302578027e-06, "loss": 0.5815, "step": 4623 }, { "epoch": 0.3, "grad_norm": 1.487576559366301, "learning_rate": 8.256963895667339e-06, "loss": 0.7096, "step": 4624 }, { "epoch": 0.3, "grad_norm": 1.9643367643318594, "learning_rate": 8.256177348779188e-06, "loss": 0.7476, "step": 4625 }, { "epoch": 0.3, "grad_norm": 1.7388137508132957, "learning_rate": 8.255390661947376e-06, "loss": 0.739, "step": 4626 }, { "epoch": 0.3, "grad_norm": 1.7069787208975926, "learning_rate": 8.254603835205715e-06, "loss": 0.8193, "step": 4627 }, { "epoch": 0.3, "grad_norm": 1.699512212290823, "learning_rate": 8.253816868588019e-06, "loss": 0.7241, "step": 4628 }, { "epoch": 0.3, "grad_norm": 2.0134676954034996, "learning_rate": 8.253029762128111e-06, "loss": 0.7539, "step": 4629 }, { "epoch": 0.3, "grad_norm": 1.7394626279877672, "learning_rate": 8.252242515859821e-06, "loss": 0.7237, "step": 4630 }, { "epoch": 0.3, "grad_norm": 1.2611812133081952, "learning_rate": 8.25145512981698e-06, "loss": 0.7222, "step": 4631 }, { "epoch": 0.3, "grad_norm": 1.524409324120508, "learning_rate": 8.250667604033432e-06, "loss": 0.7141, "step": 4632 }, { "epoch": 0.3, "grad_norm": 1.6116507855195996, "learning_rate": 8.249879938543017e-06, "loss": 0.7352, "step": 4633 }, { "epoch": 0.3, "grad_norm": 1.7655955386227176, "learning_rate": 8.249092133379593e-06, "loss": 0.6397, "step": 4634 }, { "epoch": 0.3, "grad_norm": 1.3525861588995596, "learning_rate": 8.248304188577018e-06, "loss": 0.7063, "step": 4635 }, { "epoch": 0.3, "grad_norm": 1.648220027815366, "learning_rate": 8.247516104169153e-06, "loss": 0.7488, "step": 4636 }, { "epoch": 0.3, "grad_norm": 1.4615475792329837, "learning_rate": 8.24672788018987e-06, "loss": 0.6489, "step": 4637 }, { "epoch": 0.3, "grad_norm": 1.4361459357993158, "learning_rate": 8.245939516673045e-06, "loss": 0.7099, "step": 4638 }, { "epoch": 0.3, "grad_norm": 1.3838680988148353, "learning_rate": 8.245151013652561e-06, "loss": 0.7338, "step": 4639 }, { "epoch": 0.3, "grad_norm": 1.737261913463298, "learning_rate": 8.244362371162305e-06, "loss": 0.5556, "step": 4640 }, { "epoch": 0.3, "grad_norm": 1.4124457157982726, "learning_rate": 8.24357358923617e-06, "loss": 0.6235, "step": 4641 }, { "epoch": 0.3, "grad_norm": 1.6784823686068842, "learning_rate": 8.242784667908062e-06, "loss": 0.8061, "step": 4642 }, { "epoch": 0.3, "grad_norm": 1.3415540677849405, "learning_rate": 8.241995607211878e-06, "loss": 0.727, "step": 4643 }, { "epoch": 0.3, "grad_norm": 1.4960162404538124, "learning_rate": 8.24120640718154e-06, "loss": 0.7415, "step": 4644 }, { "epoch": 0.3, "grad_norm": 1.3719464257365144, "learning_rate": 8.240417067850957e-06, "loss": 0.7067, "step": 4645 }, { "epoch": 0.3, "grad_norm": 1.5696666687341967, "learning_rate": 8.23962758925406e-06, "loss": 0.7323, "step": 4646 }, { "epoch": 0.3, "grad_norm": 1.3896732740050686, "learning_rate": 8.238837971424776e-06, "loss": 0.6449, "step": 4647 }, { "epoch": 0.3, "grad_norm": 1.2342989524408199, "learning_rate": 8.23804821439704e-06, "loss": 0.593, "step": 4648 }, { "epoch": 0.3, "grad_norm": 1.331893302812865, "learning_rate": 8.2372583182048e-06, "loss": 0.6996, "step": 4649 }, { "epoch": 0.3, "grad_norm": 1.4957684055722904, "learning_rate": 8.236468282881997e-06, "loss": 0.6386, "step": 4650 }, { "epoch": 0.3, "grad_norm": 1.024377809485346, "learning_rate": 8.235678108462589e-06, "loss": 0.6192, "step": 4651 }, { "epoch": 0.3, "grad_norm": 1.6374786450943677, "learning_rate": 8.234887794980532e-06, "loss": 0.8127, "step": 4652 }, { "epoch": 0.3, "grad_norm": 1.5887892994215247, "learning_rate": 8.2340973424698e-06, "loss": 0.7488, "step": 4653 }, { "epoch": 0.3, "grad_norm": 1.5779465086149198, "learning_rate": 8.233306750964357e-06, "loss": 0.7597, "step": 4654 }, { "epoch": 0.3, "grad_norm": 1.69703644334097, "learning_rate": 8.232516020498184e-06, "loss": 0.6229, "step": 4655 }, { "epoch": 0.3, "grad_norm": 1.6811464701117027, "learning_rate": 8.231725151105265e-06, "loss": 0.7693, "step": 4656 }, { "epoch": 0.3, "grad_norm": 1.4276385704443117, "learning_rate": 8.230934142819588e-06, "loss": 0.7259, "step": 4657 }, { "epoch": 0.3, "grad_norm": 1.6272569379934712, "learning_rate": 8.230142995675155e-06, "loss": 0.7144, "step": 4658 }, { "epoch": 0.3, "grad_norm": 2.036382480863166, "learning_rate": 8.229351709705961e-06, "loss": 0.859, "step": 4659 }, { "epoch": 0.3, "grad_norm": 1.6293151612156076, "learning_rate": 8.228560284946015e-06, "loss": 0.6881, "step": 4660 }, { "epoch": 0.3, "grad_norm": 1.4044611860434566, "learning_rate": 8.227768721429334e-06, "loss": 0.6946, "step": 4661 }, { "epoch": 0.3, "grad_norm": 1.32659091270658, "learning_rate": 8.226977019189936e-06, "loss": 0.703, "step": 4662 }, { "epoch": 0.3, "grad_norm": 1.337717886678058, "learning_rate": 8.226185178261846e-06, "loss": 0.6128, "step": 4663 }, { "epoch": 0.3, "grad_norm": 1.7054813628716081, "learning_rate": 8.225393198679096e-06, "loss": 0.7642, "step": 4664 }, { "epoch": 0.3, "grad_norm": 1.341550303922333, "learning_rate": 8.224601080475723e-06, "loss": 0.662, "step": 4665 }, { "epoch": 0.3, "grad_norm": 1.525700036198287, "learning_rate": 8.223808823685773e-06, "loss": 0.7903, "step": 4666 }, { "epoch": 0.3, "grad_norm": 1.4572086899031416, "learning_rate": 8.223016428343294e-06, "loss": 0.8071, "step": 4667 }, { "epoch": 0.3, "grad_norm": 1.4223643139761832, "learning_rate": 8.222223894482339e-06, "loss": 0.6996, "step": 4668 }, { "epoch": 0.3, "grad_norm": 1.580272415430593, "learning_rate": 8.221431222136976e-06, "loss": 0.804, "step": 4669 }, { "epoch": 0.3, "grad_norm": 1.4736273227737484, "learning_rate": 8.220638411341264e-06, "loss": 0.747, "step": 4670 }, { "epoch": 0.3, "grad_norm": 1.5128788082515081, "learning_rate": 8.219845462129284e-06, "loss": 0.7215, "step": 4671 }, { "epoch": 0.3, "grad_norm": 1.5095558059459167, "learning_rate": 8.219052374535109e-06, "loss": 0.6754, "step": 4672 }, { "epoch": 0.3, "grad_norm": 1.5607992089032856, "learning_rate": 8.218259148592828e-06, "loss": 0.6968, "step": 4673 }, { "epoch": 0.3, "grad_norm": 1.5756876751439042, "learning_rate": 8.21746578433653e-06, "loss": 0.6506, "step": 4674 }, { "epoch": 0.3, "grad_norm": 1.5863856617565568, "learning_rate": 8.216672281800317e-06, "loss": 0.7265, "step": 4675 }, { "epoch": 0.3, "grad_norm": 1.6785905119217823, "learning_rate": 8.215878641018287e-06, "loss": 0.7077, "step": 4676 }, { "epoch": 0.3, "grad_norm": 1.587396079930141, "learning_rate": 8.21508486202455e-06, "loss": 0.762, "step": 4677 }, { "epoch": 0.3, "grad_norm": 1.5185738497717238, "learning_rate": 8.214290944853221e-06, "loss": 0.6965, "step": 4678 }, { "epoch": 0.3, "grad_norm": 1.52105889700507, "learning_rate": 8.213496889538422e-06, "loss": 0.7514, "step": 4679 }, { "epoch": 0.3, "grad_norm": 1.7837078500803258, "learning_rate": 8.212702696114279e-06, "loss": 0.7427, "step": 4680 }, { "epoch": 0.3, "grad_norm": 1.8052388450134114, "learning_rate": 8.211908364614924e-06, "loss": 0.796, "step": 4681 }, { "epoch": 0.3, "grad_norm": 1.5291930838859693, "learning_rate": 8.211113895074498e-06, "loss": 0.7167, "step": 4682 }, { "epoch": 0.3, "grad_norm": 1.6487807267368255, "learning_rate": 8.210319287527143e-06, "loss": 0.8, "step": 4683 }, { "epoch": 0.3, "grad_norm": 1.3474007714580987, "learning_rate": 8.209524542007012e-06, "loss": 0.5931, "step": 4684 }, { "epoch": 0.3, "grad_norm": 1.5298200445281753, "learning_rate": 8.20872965854826e-06, "loss": 0.7836, "step": 4685 }, { "epoch": 0.3, "grad_norm": 1.0503927123148238, "learning_rate": 8.207934637185049e-06, "loss": 0.6656, "step": 4686 }, { "epoch": 0.3, "grad_norm": 1.5217445151066487, "learning_rate": 8.207139477951549e-06, "loss": 0.7212, "step": 4687 }, { "epoch": 0.3, "grad_norm": 1.677470635699017, "learning_rate": 8.206344180881933e-06, "loss": 0.7064, "step": 4688 }, { "epoch": 0.3, "grad_norm": 1.6086857946329292, "learning_rate": 8.205548746010383e-06, "loss": 0.6525, "step": 4689 }, { "epoch": 0.3, "grad_norm": 1.4938277784620468, "learning_rate": 8.204753173371081e-06, "loss": 0.6807, "step": 4690 }, { "epoch": 0.3, "grad_norm": 1.6237507315952973, "learning_rate": 8.203957462998225e-06, "loss": 0.7604, "step": 4691 }, { "epoch": 0.3, "grad_norm": 1.7088289886614518, "learning_rate": 8.203161614926007e-06, "loss": 0.7272, "step": 4692 }, { "epoch": 0.3, "grad_norm": 1.5660625284879286, "learning_rate": 8.202365629188634e-06, "loss": 0.6658, "step": 4693 }, { "epoch": 0.3, "grad_norm": 1.4115619017312748, "learning_rate": 8.201569505820315e-06, "loss": 0.7913, "step": 4694 }, { "epoch": 0.3, "grad_norm": 1.6043051753797306, "learning_rate": 8.200773244855267e-06, "loss": 0.7094, "step": 4695 }, { "epoch": 0.3, "grad_norm": 1.5294816079025166, "learning_rate": 8.199976846327711e-06, "loss": 0.711, "step": 4696 }, { "epoch": 0.3, "grad_norm": 1.5101963127992448, "learning_rate": 8.199180310271873e-06, "loss": 0.7075, "step": 4697 }, { "epoch": 0.3, "grad_norm": 1.5692397334252068, "learning_rate": 8.19838363672199e-06, "loss": 0.7394, "step": 4698 }, { "epoch": 0.3, "grad_norm": 1.83065520358022, "learning_rate": 8.197586825712295e-06, "loss": 0.6669, "step": 4699 }, { "epoch": 0.3, "grad_norm": 1.0844845424169585, "learning_rate": 8.19678987727704e-06, "loss": 0.6212, "step": 4700 }, { "epoch": 0.3, "grad_norm": 1.7190693029102315, "learning_rate": 8.195992791450475e-06, "loss": 0.7543, "step": 4701 }, { "epoch": 0.3, "grad_norm": 1.6295299282935316, "learning_rate": 8.195195568266853e-06, "loss": 0.6666, "step": 4702 }, { "epoch": 0.3, "grad_norm": 1.1238158942417193, "learning_rate": 8.19439820776044e-06, "loss": 0.6109, "step": 4703 }, { "epoch": 0.3, "grad_norm": 1.5956070429482696, "learning_rate": 8.193600709965504e-06, "loss": 0.7114, "step": 4704 }, { "epoch": 0.3, "grad_norm": 1.5748389458749694, "learning_rate": 8.19280307491632e-06, "loss": 0.6355, "step": 4705 }, { "epoch": 0.3, "grad_norm": 1.4243994873749457, "learning_rate": 8.19200530264717e-06, "loss": 0.6184, "step": 4706 }, { "epoch": 0.3, "grad_norm": 1.3736184839285916, "learning_rate": 8.19120739319234e-06, "loss": 0.5947, "step": 4707 }, { "epoch": 0.3, "grad_norm": 1.6736074373002459, "learning_rate": 8.19040934658612e-06, "loss": 0.6215, "step": 4708 }, { "epoch": 0.3, "grad_norm": 1.47177398402291, "learning_rate": 8.189611162862811e-06, "loss": 0.7324, "step": 4709 }, { "epoch": 0.3, "grad_norm": 1.5593275972098855, "learning_rate": 8.188812842056717e-06, "loss": 0.7337, "step": 4710 }, { "epoch": 0.3, "grad_norm": 1.4281976294395913, "learning_rate": 8.188014384202148e-06, "loss": 0.6797, "step": 4711 }, { "epoch": 0.3, "grad_norm": 1.6311891448975608, "learning_rate": 8.187215789333418e-06, "loss": 0.7667, "step": 4712 }, { "epoch": 0.3, "grad_norm": 1.3881356794515751, "learning_rate": 8.186417057484851e-06, "loss": 0.7378, "step": 4713 }, { "epoch": 0.3, "grad_norm": 1.7964402520869278, "learning_rate": 8.185618188690776e-06, "loss": 0.7227, "step": 4714 }, { "epoch": 0.3, "grad_norm": 1.5392048878990932, "learning_rate": 8.184819182985524e-06, "loss": 0.7276, "step": 4715 }, { "epoch": 0.3, "grad_norm": 1.7189278122788603, "learning_rate": 8.184020040403437e-06, "loss": 0.7299, "step": 4716 }, { "epoch": 0.3, "grad_norm": 1.2295021496383445, "learning_rate": 8.183220760978858e-06, "loss": 0.6133, "step": 4717 }, { "epoch": 0.3, "grad_norm": 1.5885374125288572, "learning_rate": 8.18242134474614e-06, "loss": 0.7838, "step": 4718 }, { "epoch": 0.3, "grad_norm": 1.443858292183129, "learning_rate": 8.18162179173964e-06, "loss": 0.6444, "step": 4719 }, { "epoch": 0.3, "grad_norm": 1.5051688001421408, "learning_rate": 8.180822101993719e-06, "loss": 0.717, "step": 4720 }, { "epoch": 0.3, "grad_norm": 1.650435977075695, "learning_rate": 8.18002227554275e-06, "loss": 0.7722, "step": 4721 }, { "epoch": 0.3, "grad_norm": 1.1259322564084073, "learning_rate": 8.179222312421104e-06, "loss": 0.7674, "step": 4722 }, { "epoch": 0.3, "grad_norm": 1.445893513759816, "learning_rate": 8.178422212663166e-06, "loss": 0.6931, "step": 4723 }, { "epoch": 0.3, "grad_norm": 1.5509398257298719, "learning_rate": 8.177621976303318e-06, "loss": 0.658, "step": 4724 }, { "epoch": 0.3, "grad_norm": 1.7977726580801285, "learning_rate": 8.176821603375955e-06, "loss": 0.7351, "step": 4725 }, { "epoch": 0.3, "grad_norm": 1.7687060956241425, "learning_rate": 8.176021093915476e-06, "loss": 0.7032, "step": 4726 }, { "epoch": 0.3, "grad_norm": 1.6261018003977086, "learning_rate": 8.175220447956282e-06, "loss": 0.6748, "step": 4727 }, { "epoch": 0.3, "grad_norm": 1.5171938121650241, "learning_rate": 8.174419665532787e-06, "loss": 0.6862, "step": 4728 }, { "epoch": 0.3, "grad_norm": 1.5860084036827515, "learning_rate": 8.173618746679406e-06, "loss": 0.7011, "step": 4729 }, { "epoch": 0.3, "grad_norm": 1.7484114697615774, "learning_rate": 8.172817691430556e-06, "loss": 0.7929, "step": 4730 }, { "epoch": 0.3, "grad_norm": 1.603387120861496, "learning_rate": 8.172016499820672e-06, "loss": 0.7272, "step": 4731 }, { "epoch": 0.3, "grad_norm": 1.5382655504516243, "learning_rate": 8.171215171884183e-06, "loss": 0.7773, "step": 4732 }, { "epoch": 0.3, "grad_norm": 1.8718890222378741, "learning_rate": 8.170413707655532e-06, "loss": 0.6582, "step": 4733 }, { "epoch": 0.3, "grad_norm": 1.4348594381587996, "learning_rate": 8.169612107169158e-06, "loss": 0.6536, "step": 4734 }, { "epoch": 0.3, "grad_norm": 1.793871820164169, "learning_rate": 8.168810370459519e-06, "loss": 0.6908, "step": 4735 }, { "epoch": 0.3, "grad_norm": 1.4816048585918262, "learning_rate": 8.168008497561066e-06, "loss": 0.6924, "step": 4736 }, { "epoch": 0.3, "grad_norm": 1.655743330201733, "learning_rate": 8.167206488508268e-06, "loss": 0.6733, "step": 4737 }, { "epoch": 0.3, "grad_norm": 1.7083135850004412, "learning_rate": 8.166404343335587e-06, "loss": 0.7377, "step": 4738 }, { "epoch": 0.3, "grad_norm": 1.5588398347667254, "learning_rate": 8.165602062077502e-06, "loss": 0.8381, "step": 4739 }, { "epoch": 0.3, "grad_norm": 1.966360429001188, "learning_rate": 8.164799644768494e-06, "loss": 0.6456, "step": 4740 }, { "epoch": 0.3, "grad_norm": 1.6222042283461857, "learning_rate": 8.163997091443046e-06, "loss": 0.6756, "step": 4741 }, { "epoch": 0.3, "grad_norm": 1.733658469698317, "learning_rate": 8.16319440213565e-06, "loss": 0.7309, "step": 4742 }, { "epoch": 0.3, "grad_norm": 1.5208884613985674, "learning_rate": 8.162391576880808e-06, "loss": 0.7242, "step": 4743 }, { "epoch": 0.3, "grad_norm": 1.3171078878294056, "learning_rate": 8.16158861571302e-06, "loss": 0.7382, "step": 4744 }, { "epoch": 0.3, "grad_norm": 1.6947917807810704, "learning_rate": 8.160785518666795e-06, "loss": 0.6757, "step": 4745 }, { "epoch": 0.3, "grad_norm": 1.7299927775358923, "learning_rate": 8.159982285776654e-06, "loss": 0.7092, "step": 4746 }, { "epoch": 0.3, "grad_norm": 1.6106892293574457, "learning_rate": 8.159178917077112e-06, "loss": 0.6205, "step": 4747 }, { "epoch": 0.3, "grad_norm": 1.523759294085645, "learning_rate": 8.158375412602698e-06, "loss": 0.662, "step": 4748 }, { "epoch": 0.3, "grad_norm": 1.5527450718559592, "learning_rate": 8.157571772387947e-06, "loss": 0.7084, "step": 4749 }, { "epoch": 0.3, "grad_norm": 1.6873099014317021, "learning_rate": 8.156767996467394e-06, "loss": 0.7674, "step": 4750 }, { "epoch": 0.3, "grad_norm": 1.8988104580680196, "learning_rate": 8.155964084875587e-06, "loss": 0.6826, "step": 4751 }, { "epoch": 0.3, "grad_norm": 1.0853335809208582, "learning_rate": 8.155160037647076e-06, "loss": 0.6852, "step": 4752 }, { "epoch": 0.3, "grad_norm": 1.426578714039487, "learning_rate": 8.154355854816416e-06, "loss": 0.6301, "step": 4753 }, { "epoch": 0.3, "grad_norm": 1.9189807246827926, "learning_rate": 8.15355153641817e-06, "loss": 0.6967, "step": 4754 }, { "epoch": 0.3, "grad_norm": 1.6872820639246933, "learning_rate": 8.152747082486905e-06, "loss": 0.6935, "step": 4755 }, { "epoch": 0.3, "grad_norm": 1.4706599829819154, "learning_rate": 8.151942493057195e-06, "loss": 0.7167, "step": 4756 }, { "epoch": 0.3, "grad_norm": 1.5891673750929391, "learning_rate": 8.15113776816362e-06, "loss": 0.6668, "step": 4757 }, { "epoch": 0.3, "grad_norm": 1.8550862860582324, "learning_rate": 8.150332907840765e-06, "loss": 0.7203, "step": 4758 }, { "epoch": 0.3, "grad_norm": 1.4441054650350655, "learning_rate": 8.14952791212322e-06, "loss": 0.6702, "step": 4759 }, { "epoch": 0.3, "grad_norm": 1.521688325021317, "learning_rate": 8.148722781045586e-06, "loss": 0.7603, "step": 4760 }, { "epoch": 0.3, "grad_norm": 1.513396191227618, "learning_rate": 8.147917514642462e-06, "loss": 0.725, "step": 4761 }, { "epoch": 0.3, "grad_norm": 1.4190556674657975, "learning_rate": 8.147112112948459e-06, "loss": 0.6539, "step": 4762 }, { "epoch": 0.3, "grad_norm": 1.4362061775945676, "learning_rate": 8.146306575998188e-06, "loss": 0.6773, "step": 4763 }, { "epoch": 0.3, "grad_norm": 1.5611947382990825, "learning_rate": 8.145500903826274e-06, "loss": 0.7142, "step": 4764 }, { "epoch": 0.3, "grad_norm": 1.9405557539475315, "learning_rate": 8.14469509646734e-06, "loss": 0.7084, "step": 4765 }, { "epoch": 0.31, "grad_norm": 2.0912779964371953, "learning_rate": 8.143889153956019e-06, "loss": 0.7617, "step": 4766 }, { "epoch": 0.31, "grad_norm": 1.496055186552933, "learning_rate": 8.143083076326947e-06, "loss": 0.6516, "step": 4767 }, { "epoch": 0.31, "grad_norm": 1.785591094161214, "learning_rate": 8.14227686361477e-06, "loss": 0.645, "step": 4768 }, { "epoch": 0.31, "grad_norm": 1.5942649189745077, "learning_rate": 8.141470515854137e-06, "loss": 0.7676, "step": 4769 }, { "epoch": 0.31, "grad_norm": 1.7314249802744277, "learning_rate": 8.1406640330797e-06, "loss": 0.7122, "step": 4770 }, { "epoch": 0.31, "grad_norm": 1.4866425086243833, "learning_rate": 8.139857415326125e-06, "loss": 0.6711, "step": 4771 }, { "epoch": 0.31, "grad_norm": 1.4148967069027518, "learning_rate": 8.139050662628074e-06, "loss": 0.6832, "step": 4772 }, { "epoch": 0.31, "grad_norm": 1.389790460170741, "learning_rate": 8.138243775020222e-06, "loss": 0.6644, "step": 4773 }, { "epoch": 0.31, "grad_norm": 1.6308735081166672, "learning_rate": 8.137436752537248e-06, "loss": 0.7713, "step": 4774 }, { "epoch": 0.31, "grad_norm": 1.8921904544724404, "learning_rate": 8.136629595213834e-06, "loss": 0.7796, "step": 4775 }, { "epoch": 0.31, "grad_norm": 1.4509955232824603, "learning_rate": 8.135822303084671e-06, "loss": 0.7478, "step": 4776 }, { "epoch": 0.31, "grad_norm": 1.567381792771335, "learning_rate": 8.135014876184454e-06, "loss": 0.6851, "step": 4777 }, { "epoch": 0.31, "grad_norm": 1.6357723563933866, "learning_rate": 8.134207314547887e-06, "loss": 0.7892, "step": 4778 }, { "epoch": 0.31, "grad_norm": 1.141242451676874, "learning_rate": 8.133399618209675e-06, "loss": 0.6072, "step": 4779 }, { "epoch": 0.31, "grad_norm": 1.6120919933095, "learning_rate": 8.132591787204531e-06, "loss": 0.7379, "step": 4780 }, { "epoch": 0.31, "grad_norm": 1.143040140277473, "learning_rate": 8.131783821567175e-06, "loss": 0.6317, "step": 4781 }, { "epoch": 0.31, "grad_norm": 1.6563970348368884, "learning_rate": 8.130975721332332e-06, "loss": 0.7958, "step": 4782 }, { "epoch": 0.31, "grad_norm": 1.8692631513306777, "learning_rate": 8.13016748653473e-06, "loss": 0.723, "step": 4783 }, { "epoch": 0.31, "grad_norm": 1.6275060458154378, "learning_rate": 8.129359117209107e-06, "loss": 0.7027, "step": 4784 }, { "epoch": 0.31, "grad_norm": 1.342269284670648, "learning_rate": 8.128550613390205e-06, "loss": 0.6291, "step": 4785 }, { "epoch": 0.31, "grad_norm": 1.4247510096226867, "learning_rate": 8.127741975112771e-06, "loss": 0.7242, "step": 4786 }, { "epoch": 0.31, "grad_norm": 1.2834462765354042, "learning_rate": 8.12693320241156e-06, "loss": 0.6033, "step": 4787 }, { "epoch": 0.31, "grad_norm": 1.1394943615611202, "learning_rate": 8.126124295321331e-06, "loss": 0.7282, "step": 4788 }, { "epoch": 0.31, "grad_norm": 1.5783247890504701, "learning_rate": 8.12531525387685e-06, "loss": 0.7981, "step": 4789 }, { "epoch": 0.31, "grad_norm": 2.0648941965563274, "learning_rate": 8.124506078112883e-06, "loss": 0.6363, "step": 4790 }, { "epoch": 0.31, "grad_norm": 1.64467009478431, "learning_rate": 8.123696768064212e-06, "loss": 0.7328, "step": 4791 }, { "epoch": 0.31, "grad_norm": 1.370156839082042, "learning_rate": 8.122887323765617e-06, "loss": 0.6616, "step": 4792 }, { "epoch": 0.31, "grad_norm": 1.528224514680268, "learning_rate": 8.122077745251888e-06, "loss": 0.7527, "step": 4793 }, { "epoch": 0.31, "grad_norm": 1.8444887989046874, "learning_rate": 8.12126803255782e-06, "loss": 0.7091, "step": 4794 }, { "epoch": 0.31, "grad_norm": 1.5050898739638539, "learning_rate": 8.120458185718206e-06, "loss": 0.7131, "step": 4795 }, { "epoch": 0.31, "grad_norm": 1.477670964247826, "learning_rate": 8.119648204767857e-06, "loss": 0.7026, "step": 4796 }, { "epoch": 0.31, "grad_norm": 1.608851567071603, "learning_rate": 8.118838089741585e-06, "loss": 0.668, "step": 4797 }, { "epoch": 0.31, "grad_norm": 1.3737780521054055, "learning_rate": 8.118027840674205e-06, "loss": 0.7105, "step": 4798 }, { "epoch": 0.31, "grad_norm": 1.6642536148183202, "learning_rate": 8.117217457600541e-06, "loss": 0.7186, "step": 4799 }, { "epoch": 0.31, "grad_norm": 1.4847339751434856, "learning_rate": 8.11640694055542e-06, "loss": 0.702, "step": 4800 }, { "epoch": 0.31, "grad_norm": 1.1944019179216439, "learning_rate": 8.11559628957368e-06, "loss": 0.6903, "step": 4801 }, { "epoch": 0.31, "grad_norm": 1.576125196931438, "learning_rate": 8.114785504690155e-06, "loss": 0.7237, "step": 4802 }, { "epoch": 0.31, "grad_norm": 2.00364316783663, "learning_rate": 8.113974585939694e-06, "loss": 0.7261, "step": 4803 }, { "epoch": 0.31, "grad_norm": 2.264290560362341, "learning_rate": 8.11316353335715e-06, "loss": 0.6985, "step": 4804 }, { "epoch": 0.31, "grad_norm": 1.5874358200665781, "learning_rate": 8.112352346977378e-06, "loss": 0.742, "step": 4805 }, { "epoch": 0.31, "grad_norm": 1.4771007509960812, "learning_rate": 8.111541026835243e-06, "loss": 0.6982, "step": 4806 }, { "epoch": 0.31, "grad_norm": 1.6482493349302891, "learning_rate": 8.110729572965613e-06, "loss": 0.7343, "step": 4807 }, { "epoch": 0.31, "grad_norm": 1.487593525103174, "learning_rate": 8.109917985403362e-06, "loss": 0.737, "step": 4808 }, { "epoch": 0.31, "grad_norm": 1.6778980760561997, "learning_rate": 8.109106264183369e-06, "loss": 0.6318, "step": 4809 }, { "epoch": 0.31, "grad_norm": 1.4120353257961025, "learning_rate": 8.108294409340525e-06, "loss": 0.6527, "step": 4810 }, { "epoch": 0.31, "grad_norm": 1.4706332704898726, "learning_rate": 8.107482420909719e-06, "loss": 0.7197, "step": 4811 }, { "epoch": 0.31, "grad_norm": 1.5728252436648544, "learning_rate": 8.106670298925845e-06, "loss": 0.6833, "step": 4812 }, { "epoch": 0.31, "grad_norm": 1.5558188382454652, "learning_rate": 8.105858043423811e-06, "loss": 0.6805, "step": 4813 }, { "epoch": 0.31, "grad_norm": 1.722313184471008, "learning_rate": 8.105045654438525e-06, "loss": 0.7237, "step": 4814 }, { "epoch": 0.31, "grad_norm": 1.4745239246999664, "learning_rate": 8.104233132004902e-06, "loss": 0.7373, "step": 4815 }, { "epoch": 0.31, "grad_norm": 1.4784435732487482, "learning_rate": 8.103420476157861e-06, "loss": 0.6963, "step": 4816 }, { "epoch": 0.31, "grad_norm": 1.049755130028667, "learning_rate": 8.10260768693233e-06, "loss": 0.7232, "step": 4817 }, { "epoch": 0.31, "grad_norm": 1.607321428991571, "learning_rate": 8.101794764363238e-06, "loss": 0.7217, "step": 4818 }, { "epoch": 0.31, "grad_norm": 1.1212282752743241, "learning_rate": 8.100981708485527e-06, "loss": 0.7539, "step": 4819 }, { "epoch": 0.31, "grad_norm": 1.5385174020705183, "learning_rate": 8.100168519334137e-06, "loss": 0.6798, "step": 4820 }, { "epoch": 0.31, "grad_norm": 1.6989163097078734, "learning_rate": 8.09935519694402e-06, "loss": 0.733, "step": 4821 }, { "epoch": 0.31, "grad_norm": 1.4948908324592791, "learning_rate": 8.098541741350126e-06, "loss": 0.7143, "step": 4822 }, { "epoch": 0.31, "grad_norm": 1.7846828890205146, "learning_rate": 8.09772815258742e-06, "loss": 0.7657, "step": 4823 }, { "epoch": 0.31, "grad_norm": 1.4905639596173355, "learning_rate": 8.096914430690868e-06, "loss": 0.6719, "step": 4824 }, { "epoch": 0.31, "grad_norm": 1.674155415605279, "learning_rate": 8.096100575695443e-06, "loss": 0.8097, "step": 4825 }, { "epoch": 0.31, "grad_norm": 1.54868054385681, "learning_rate": 8.09528658763612e-06, "loss": 0.8643, "step": 4826 }, { "epoch": 0.31, "grad_norm": 1.1388342143526264, "learning_rate": 8.094472466547882e-06, "loss": 0.7082, "step": 4827 }, { "epoch": 0.31, "grad_norm": 2.0955424494313415, "learning_rate": 8.09365821246572e-06, "loss": 0.7544, "step": 4828 }, { "epoch": 0.31, "grad_norm": 1.1827613013728153, "learning_rate": 8.09284382542463e-06, "loss": 0.6423, "step": 4829 }, { "epoch": 0.31, "grad_norm": 1.6575161588929597, "learning_rate": 8.092029305459612e-06, "loss": 0.7812, "step": 4830 }, { "epoch": 0.31, "grad_norm": 1.2102987987203175, "learning_rate": 8.09121465260567e-06, "loss": 0.6144, "step": 4831 }, { "epoch": 0.31, "grad_norm": 1.4736351214609185, "learning_rate": 8.090399866897818e-06, "loss": 0.6316, "step": 4832 }, { "epoch": 0.31, "grad_norm": 1.5975434325981728, "learning_rate": 8.089584948371074e-06, "loss": 0.8315, "step": 4833 }, { "epoch": 0.31, "grad_norm": 2.1810793576274423, "learning_rate": 8.088769897060461e-06, "loss": 0.7358, "step": 4834 }, { "epoch": 0.31, "grad_norm": 1.1147151742647867, "learning_rate": 8.087954713001007e-06, "loss": 0.6413, "step": 4835 }, { "epoch": 0.31, "grad_norm": 1.264216197698747, "learning_rate": 8.08713939622775e-06, "loss": 0.6114, "step": 4836 }, { "epoch": 0.31, "grad_norm": 1.5118201682717531, "learning_rate": 8.086323946775727e-06, "loss": 0.6364, "step": 4837 }, { "epoch": 0.31, "grad_norm": 1.5343149333975807, "learning_rate": 8.085508364679989e-06, "loss": 0.7278, "step": 4838 }, { "epoch": 0.31, "grad_norm": 1.6043978487865924, "learning_rate": 8.084692649975583e-06, "loss": 0.7429, "step": 4839 }, { "epoch": 0.31, "grad_norm": 1.2028218381669509, "learning_rate": 8.083876802697567e-06, "loss": 0.6448, "step": 4840 }, { "epoch": 0.31, "grad_norm": 1.5317387331606775, "learning_rate": 8.083060822881008e-06, "loss": 0.6842, "step": 4841 }, { "epoch": 0.31, "grad_norm": 1.5682261687670351, "learning_rate": 8.082244710560973e-06, "loss": 0.7464, "step": 4842 }, { "epoch": 0.31, "grad_norm": 1.499106398634914, "learning_rate": 8.081428465772539e-06, "loss": 0.6608, "step": 4843 }, { "epoch": 0.31, "grad_norm": 1.5348941044490456, "learning_rate": 8.080612088550782e-06, "loss": 0.6225, "step": 4844 }, { "epoch": 0.31, "grad_norm": 1.552589105381648, "learning_rate": 8.079795578930792e-06, "loss": 0.6758, "step": 4845 }, { "epoch": 0.31, "grad_norm": 1.478382448853864, "learning_rate": 8.07897893694766e-06, "loss": 0.7248, "step": 4846 }, { "epoch": 0.31, "grad_norm": 1.040132940673683, "learning_rate": 8.07816216263648e-06, "loss": 0.6946, "step": 4847 }, { "epoch": 0.31, "grad_norm": 1.6673586928296134, "learning_rate": 8.07734525603236e-06, "loss": 0.7882, "step": 4848 }, { "epoch": 0.31, "grad_norm": 3.444342781932193, "learning_rate": 8.076528217170408e-06, "loss": 0.666, "step": 4849 }, { "epoch": 0.31, "grad_norm": 1.5181008043924449, "learning_rate": 8.075711046085738e-06, "loss": 0.7119, "step": 4850 }, { "epoch": 0.31, "grad_norm": 1.477168839778125, "learning_rate": 8.07489374281347e-06, "loss": 0.6818, "step": 4851 }, { "epoch": 0.31, "grad_norm": 1.5497077024999661, "learning_rate": 8.07407630738873e-06, "loss": 0.6832, "step": 4852 }, { "epoch": 0.31, "grad_norm": 1.4879869696667074, "learning_rate": 8.07325873984665e-06, "loss": 0.6528, "step": 4853 }, { "epoch": 0.31, "grad_norm": 1.800363352179792, "learning_rate": 8.072441040222367e-06, "loss": 0.7239, "step": 4854 }, { "epoch": 0.31, "grad_norm": 1.6923614471030426, "learning_rate": 8.071623208551023e-06, "loss": 0.7438, "step": 4855 }, { "epoch": 0.31, "grad_norm": 1.0319405259453445, "learning_rate": 8.07080524486777e-06, "loss": 0.7209, "step": 4856 }, { "epoch": 0.31, "grad_norm": 1.6955856753361773, "learning_rate": 8.069987149207759e-06, "loss": 0.7052, "step": 4857 }, { "epoch": 0.31, "grad_norm": 1.4302531012625734, "learning_rate": 8.069168921606151e-06, "loss": 0.742, "step": 4858 }, { "epoch": 0.31, "grad_norm": 1.6696893377226945, "learning_rate": 8.068350562098113e-06, "loss": 0.7117, "step": 4859 }, { "epoch": 0.31, "grad_norm": 1.538700593113781, "learning_rate": 8.067532070718814e-06, "loss": 0.6827, "step": 4860 }, { "epoch": 0.31, "grad_norm": 1.9977169796474186, "learning_rate": 8.066713447503434e-06, "loss": 0.6692, "step": 4861 }, { "epoch": 0.31, "grad_norm": 1.4344513829544006, "learning_rate": 8.065894692487153e-06, "loss": 0.6261, "step": 4862 }, { "epoch": 0.31, "grad_norm": 1.7606869238490788, "learning_rate": 8.065075805705161e-06, "loss": 0.6973, "step": 4863 }, { "epoch": 0.31, "grad_norm": 1.8229247719941355, "learning_rate": 8.064256787192651e-06, "loss": 0.7866, "step": 4864 }, { "epoch": 0.31, "grad_norm": 1.2828072318986647, "learning_rate": 8.063437636984824e-06, "loss": 0.6555, "step": 4865 }, { "epoch": 0.31, "grad_norm": 1.1737609081347158, "learning_rate": 8.062618355116883e-06, "loss": 0.7012, "step": 4866 }, { "epoch": 0.31, "grad_norm": 1.8259054550425233, "learning_rate": 8.061798941624041e-06, "loss": 0.7346, "step": 4867 }, { "epoch": 0.31, "grad_norm": 1.5257807713781486, "learning_rate": 8.060979396541516e-06, "loss": 0.7734, "step": 4868 }, { "epoch": 0.31, "grad_norm": 1.8795412731130776, "learning_rate": 8.060159719904526e-06, "loss": 0.6493, "step": 4869 }, { "epoch": 0.31, "grad_norm": 1.5418975653000422, "learning_rate": 8.059339911748303e-06, "loss": 0.7188, "step": 4870 }, { "epoch": 0.31, "grad_norm": 1.544651778961415, "learning_rate": 8.058519972108078e-06, "loss": 0.801, "step": 4871 }, { "epoch": 0.31, "grad_norm": 1.669696862565848, "learning_rate": 8.057699901019093e-06, "loss": 0.7744, "step": 4872 }, { "epoch": 0.31, "grad_norm": 1.5205460168744045, "learning_rate": 8.05687969851659e-06, "loss": 0.8364, "step": 4873 }, { "epoch": 0.31, "grad_norm": 1.5554800239855184, "learning_rate": 8.056059364635822e-06, "loss": 0.6865, "step": 4874 }, { "epoch": 0.31, "grad_norm": 1.5332533598278875, "learning_rate": 8.055238899412046e-06, "loss": 0.7951, "step": 4875 }, { "epoch": 0.31, "grad_norm": 1.5953801162020382, "learning_rate": 8.05441830288052e-06, "loss": 0.6847, "step": 4876 }, { "epoch": 0.31, "grad_norm": 1.6033913843687893, "learning_rate": 8.053597575076513e-06, "loss": 0.6816, "step": 4877 }, { "epoch": 0.31, "grad_norm": 1.4460113730296884, "learning_rate": 8.052776716035298e-06, "loss": 0.6413, "step": 4878 }, { "epoch": 0.31, "grad_norm": 1.5578602660919332, "learning_rate": 8.051955725792155e-06, "loss": 0.677, "step": 4879 }, { "epoch": 0.31, "grad_norm": 1.6560432968212018, "learning_rate": 8.05113460438237e-06, "loss": 0.682, "step": 4880 }, { "epoch": 0.31, "grad_norm": 1.4585995126997582, "learning_rate": 8.050313351841229e-06, "loss": 0.7204, "step": 4881 }, { "epoch": 0.31, "grad_norm": 1.7932318032369068, "learning_rate": 8.049491968204031e-06, "loss": 0.7852, "step": 4882 }, { "epoch": 0.31, "grad_norm": 1.75617854274983, "learning_rate": 8.048670453506074e-06, "loss": 0.6828, "step": 4883 }, { "epoch": 0.31, "grad_norm": 1.136488050768744, "learning_rate": 8.04784880778267e-06, "loss": 0.6986, "step": 4884 }, { "epoch": 0.31, "grad_norm": 1.0498979292159039, "learning_rate": 8.047027031069126e-06, "loss": 0.7428, "step": 4885 }, { "epoch": 0.31, "grad_norm": 1.4581175928053212, "learning_rate": 8.046205123400764e-06, "loss": 0.6887, "step": 4886 }, { "epoch": 0.31, "grad_norm": 1.587162225294359, "learning_rate": 8.045383084812907e-06, "loss": 0.6629, "step": 4887 }, { "epoch": 0.31, "grad_norm": 1.7764323933242838, "learning_rate": 8.044560915340884e-06, "loss": 0.7191, "step": 4888 }, { "epoch": 0.31, "grad_norm": 1.662513683109073, "learning_rate": 8.04373861502003e-06, "loss": 0.6552, "step": 4889 }, { "epoch": 0.31, "grad_norm": 1.1473131008094808, "learning_rate": 8.042916183885687e-06, "loss": 0.6387, "step": 4890 }, { "epoch": 0.31, "grad_norm": 1.615326652903012, "learning_rate": 8.0420936219732e-06, "loss": 0.6484, "step": 4891 }, { "epoch": 0.31, "grad_norm": 1.4220799598986213, "learning_rate": 8.04127092931792e-06, "loss": 0.6311, "step": 4892 }, { "epoch": 0.31, "grad_norm": 1.5999933194669855, "learning_rate": 8.040448105955209e-06, "loss": 0.6743, "step": 4893 }, { "epoch": 0.31, "grad_norm": 1.5545536222613277, "learning_rate": 8.039625151920424e-06, "loss": 0.7631, "step": 4894 }, { "epoch": 0.31, "grad_norm": 1.4707255141649533, "learning_rate": 8.03880206724894e-06, "loss": 0.6881, "step": 4895 }, { "epoch": 0.31, "grad_norm": 1.7413567010265112, "learning_rate": 8.037978851976126e-06, "loss": 0.8001, "step": 4896 }, { "epoch": 0.31, "grad_norm": 1.519263322775187, "learning_rate": 8.037155506137367e-06, "loss": 0.7795, "step": 4897 }, { "epoch": 0.31, "grad_norm": 1.454224185759515, "learning_rate": 8.036332029768045e-06, "loss": 0.7118, "step": 4898 }, { "epoch": 0.31, "grad_norm": 1.70696342377999, "learning_rate": 8.035508422903554e-06, "loss": 0.7073, "step": 4899 }, { "epoch": 0.31, "grad_norm": 1.5315873365969628, "learning_rate": 8.034684685579288e-06, "loss": 0.7558, "step": 4900 }, { "epoch": 0.31, "grad_norm": 1.4344312852723486, "learning_rate": 8.033860817830651e-06, "loss": 0.6766, "step": 4901 }, { "epoch": 0.31, "grad_norm": 1.149884161998469, "learning_rate": 8.03303681969305e-06, "loss": 0.7318, "step": 4902 }, { "epoch": 0.31, "grad_norm": 1.742698594020629, "learning_rate": 8.0322126912019e-06, "loss": 0.8078, "step": 4903 }, { "epoch": 0.31, "grad_norm": 1.6996769069438797, "learning_rate": 8.031388432392625e-06, "loss": 0.7229, "step": 4904 }, { "epoch": 0.31, "grad_norm": 1.4976252203715905, "learning_rate": 8.03056404330064e-06, "loss": 0.6993, "step": 4905 }, { "epoch": 0.31, "grad_norm": 1.1337253250302535, "learning_rate": 8.029739523961381e-06, "loss": 0.6629, "step": 4906 }, { "epoch": 0.31, "grad_norm": 1.9053375272161537, "learning_rate": 8.028914874410284e-06, "loss": 0.7214, "step": 4907 }, { "epoch": 0.31, "grad_norm": 1.6441738949146778, "learning_rate": 8.02809009468279e-06, "loss": 0.6632, "step": 4908 }, { "epoch": 0.31, "grad_norm": 1.6004140703467071, "learning_rate": 8.027265184814349e-06, "loss": 0.6959, "step": 4909 }, { "epoch": 0.31, "grad_norm": 1.5441385305800766, "learning_rate": 8.026440144840409e-06, "loss": 0.6649, "step": 4910 }, { "epoch": 0.31, "grad_norm": 1.523663241091393, "learning_rate": 8.025614974796432e-06, "loss": 0.7569, "step": 4911 }, { "epoch": 0.31, "grad_norm": 1.3824170219056007, "learning_rate": 8.024789674717882e-06, "loss": 0.6136, "step": 4912 }, { "epoch": 0.31, "grad_norm": 1.5645431020519633, "learning_rate": 8.023964244640225e-06, "loss": 0.6495, "step": 4913 }, { "epoch": 0.31, "grad_norm": 3.1088327747315074, "learning_rate": 8.023138684598942e-06, "loss": 0.7742, "step": 4914 }, { "epoch": 0.31, "grad_norm": 1.4550556407873416, "learning_rate": 8.022312994629508e-06, "loss": 0.7865, "step": 4915 }, { "epoch": 0.31, "grad_norm": 1.1943631619671038, "learning_rate": 8.021487174767414e-06, "loss": 0.5576, "step": 4916 }, { "epoch": 0.31, "grad_norm": 0.9927301384754507, "learning_rate": 8.020661225048149e-06, "loss": 0.6716, "step": 4917 }, { "epoch": 0.31, "grad_norm": 1.7080524319851003, "learning_rate": 8.019835145507212e-06, "loss": 0.6959, "step": 4918 }, { "epoch": 0.31, "grad_norm": 1.5799015284563946, "learning_rate": 8.019008936180108e-06, "loss": 0.7516, "step": 4919 }, { "epoch": 0.31, "grad_norm": 1.3999903611271556, "learning_rate": 8.018182597102344e-06, "loss": 0.637, "step": 4920 }, { "epoch": 0.31, "grad_norm": 1.510733039220077, "learning_rate": 8.017356128309432e-06, "loss": 0.6178, "step": 4921 }, { "epoch": 0.32, "grad_norm": 1.5047941252368604, "learning_rate": 8.016529529836894e-06, "loss": 0.6454, "step": 4922 }, { "epoch": 0.32, "grad_norm": 1.6542497255603237, "learning_rate": 8.015702801720255e-06, "loss": 0.7795, "step": 4923 }, { "epoch": 0.32, "grad_norm": 1.459810620313538, "learning_rate": 8.014875943995048e-06, "loss": 0.6641, "step": 4924 }, { "epoch": 0.32, "grad_norm": 1.9272918529541503, "learning_rate": 8.014048956696807e-06, "loss": 0.6772, "step": 4925 }, { "epoch": 0.32, "grad_norm": 2.4562488155830944, "learning_rate": 8.013221839861076e-06, "loss": 0.6485, "step": 4926 }, { "epoch": 0.32, "grad_norm": 1.6235244543004523, "learning_rate": 8.012394593523403e-06, "loss": 0.6793, "step": 4927 }, { "epoch": 0.32, "grad_norm": 1.5432420364329666, "learning_rate": 8.011567217719339e-06, "loss": 0.8202, "step": 4928 }, { "epoch": 0.32, "grad_norm": 1.4195801603374338, "learning_rate": 8.010739712484443e-06, "loss": 0.6869, "step": 4929 }, { "epoch": 0.32, "grad_norm": 1.4123515708615753, "learning_rate": 8.009912077854282e-06, "loss": 0.6592, "step": 4930 }, { "epoch": 0.32, "grad_norm": 1.4835731131863323, "learning_rate": 8.009084313864424e-06, "loss": 0.6608, "step": 4931 }, { "epoch": 0.32, "grad_norm": 1.6707933416921303, "learning_rate": 8.008256420550446e-06, "loss": 0.7368, "step": 4932 }, { "epoch": 0.32, "grad_norm": 1.360538904454045, "learning_rate": 8.007428397947926e-06, "loss": 0.691, "step": 4933 }, { "epoch": 0.32, "grad_norm": 1.4618957443266285, "learning_rate": 8.006600246092455e-06, "loss": 0.7638, "step": 4934 }, { "epoch": 0.32, "grad_norm": 1.4052630537901065, "learning_rate": 8.005771965019622e-06, "loss": 0.7728, "step": 4935 }, { "epoch": 0.32, "grad_norm": 1.9671527570454836, "learning_rate": 8.004943554765028e-06, "loss": 0.7772, "step": 4936 }, { "epoch": 0.32, "grad_norm": 1.3852677623528022, "learning_rate": 8.004115015364273e-06, "loss": 0.7189, "step": 4937 }, { "epoch": 0.32, "grad_norm": 1.705802043911852, "learning_rate": 8.003286346852967e-06, "loss": 0.8423, "step": 4938 }, { "epoch": 0.32, "grad_norm": 1.4176252339564885, "learning_rate": 8.002457549266725e-06, "loss": 0.6975, "step": 4939 }, { "epoch": 0.32, "grad_norm": 1.9762730097455121, "learning_rate": 8.001628622641166e-06, "loss": 0.6687, "step": 4940 }, { "epoch": 0.32, "grad_norm": 2.273355603261401, "learning_rate": 8.000799567011916e-06, "loss": 0.7526, "step": 4941 }, { "epoch": 0.32, "grad_norm": 1.6213158751956263, "learning_rate": 7.999970382414606e-06, "loss": 0.7853, "step": 4942 }, { "epoch": 0.32, "grad_norm": 1.6893272881000998, "learning_rate": 7.999141068884873e-06, "loss": 0.678, "step": 4943 }, { "epoch": 0.32, "grad_norm": 1.553958125689591, "learning_rate": 7.998311626458359e-06, "loss": 0.7897, "step": 4944 }, { "epoch": 0.32, "grad_norm": 1.4819131546214463, "learning_rate": 7.997482055170712e-06, "loss": 0.6941, "step": 4945 }, { "epoch": 0.32, "grad_norm": 1.4935681487751384, "learning_rate": 7.996652355057585e-06, "loss": 0.7602, "step": 4946 }, { "epoch": 0.32, "grad_norm": 1.9057102160518244, "learning_rate": 7.995822526154636e-06, "loss": 0.6297, "step": 4947 }, { "epoch": 0.32, "grad_norm": 1.63836814870428, "learning_rate": 7.99499256849753e-06, "loss": 0.7006, "step": 4948 }, { "epoch": 0.32, "grad_norm": 1.4478713316388312, "learning_rate": 7.994162482121936e-06, "loss": 0.686, "step": 4949 }, { "epoch": 0.32, "grad_norm": 1.493606365671587, "learning_rate": 7.993332267063533e-06, "loss": 0.7102, "step": 4950 }, { "epoch": 0.32, "grad_norm": 1.3168255386543852, "learning_rate": 7.992501923357996e-06, "loss": 0.6093, "step": 4951 }, { "epoch": 0.32, "grad_norm": 1.919609488251687, "learning_rate": 7.991671451041017e-06, "loss": 0.7131, "step": 4952 }, { "epoch": 0.32, "grad_norm": 1.6885413339732012, "learning_rate": 7.990840850148283e-06, "loss": 0.6249, "step": 4953 }, { "epoch": 0.32, "grad_norm": 1.1427226931303742, "learning_rate": 7.990010120715494e-06, "loss": 0.7202, "step": 4954 }, { "epoch": 0.32, "grad_norm": 1.2573471382241264, "learning_rate": 7.989179262778356e-06, "loss": 0.6323, "step": 4955 }, { "epoch": 0.32, "grad_norm": 1.775885995098424, "learning_rate": 7.98834827637257e-06, "loss": 0.6717, "step": 4956 }, { "epoch": 0.32, "grad_norm": 1.4953682921655498, "learning_rate": 7.987517161533858e-06, "loss": 0.747, "step": 4957 }, { "epoch": 0.32, "grad_norm": 7.282476345409538, "learning_rate": 7.986685918297934e-06, "loss": 0.6522, "step": 4958 }, { "epoch": 0.32, "grad_norm": 1.6750927685834383, "learning_rate": 7.985854546700526e-06, "loss": 0.7682, "step": 4959 }, { "epoch": 0.32, "grad_norm": 1.539544547746419, "learning_rate": 7.985023046777363e-06, "loss": 0.6284, "step": 4960 }, { "epoch": 0.32, "grad_norm": 1.5267429848906862, "learning_rate": 7.984191418564183e-06, "loss": 0.7224, "step": 4961 }, { "epoch": 0.32, "grad_norm": 1.9349781040313536, "learning_rate": 7.983359662096725e-06, "loss": 0.7877, "step": 4962 }, { "epoch": 0.32, "grad_norm": 1.3286720225575595, "learning_rate": 7.982527777410738e-06, "loss": 0.6659, "step": 4963 }, { "epoch": 0.32, "grad_norm": 1.5888915592447481, "learning_rate": 7.981695764541975e-06, "loss": 0.7385, "step": 4964 }, { "epoch": 0.32, "grad_norm": 1.5692139635993445, "learning_rate": 7.980863623526195e-06, "loss": 0.6567, "step": 4965 }, { "epoch": 0.32, "grad_norm": 1.3173079808356378, "learning_rate": 7.980031354399156e-06, "loss": 0.6584, "step": 4966 }, { "epoch": 0.32, "grad_norm": 1.752491764611566, "learning_rate": 7.979198957196634e-06, "loss": 0.7966, "step": 4967 }, { "epoch": 0.32, "grad_norm": 3.357309213352949, "learning_rate": 7.9783664319544e-06, "loss": 0.6432, "step": 4968 }, { "epoch": 0.32, "grad_norm": 1.598342853560747, "learning_rate": 7.977533778708237e-06, "loss": 0.7953, "step": 4969 }, { "epoch": 0.32, "grad_norm": 1.465987282308313, "learning_rate": 7.976700997493926e-06, "loss": 0.7163, "step": 4970 }, { "epoch": 0.32, "grad_norm": 1.4510643797039602, "learning_rate": 7.975868088347263e-06, "loss": 0.7539, "step": 4971 }, { "epoch": 0.32, "grad_norm": 1.5828677353197267, "learning_rate": 7.975035051304042e-06, "loss": 0.6981, "step": 4972 }, { "epoch": 0.32, "grad_norm": 1.6107931244067393, "learning_rate": 7.974201886400066e-06, "loss": 0.7504, "step": 4973 }, { "epoch": 0.32, "grad_norm": 1.135590508451017, "learning_rate": 7.973368593671142e-06, "loss": 0.6169, "step": 4974 }, { "epoch": 0.32, "grad_norm": 1.625344297993955, "learning_rate": 7.972535173153087e-06, "loss": 0.7211, "step": 4975 }, { "epoch": 0.32, "grad_norm": 1.5414417667574016, "learning_rate": 7.971701624881714e-06, "loss": 0.7736, "step": 4976 }, { "epoch": 0.32, "grad_norm": 1.550865336618524, "learning_rate": 7.970867948892849e-06, "loss": 0.6484, "step": 4977 }, { "epoch": 0.32, "grad_norm": 1.6071307109052577, "learning_rate": 7.970034145222323e-06, "loss": 0.7216, "step": 4978 }, { "epoch": 0.32, "grad_norm": 1.3964323083100363, "learning_rate": 7.96920021390597e-06, "loss": 0.7121, "step": 4979 }, { "epoch": 0.32, "grad_norm": 1.6012247790535474, "learning_rate": 7.96836615497963e-06, "loss": 0.8001, "step": 4980 }, { "epoch": 0.32, "grad_norm": 1.6504798626187083, "learning_rate": 7.967531968479152e-06, "loss": 0.6739, "step": 4981 }, { "epoch": 0.32, "grad_norm": 1.4358261198546471, "learning_rate": 7.966697654440384e-06, "loss": 0.7146, "step": 4982 }, { "epoch": 0.32, "grad_norm": 1.746930557313374, "learning_rate": 7.965863212899185e-06, "loss": 0.7764, "step": 4983 }, { "epoch": 0.32, "grad_norm": 1.6639207882012552, "learning_rate": 7.96502864389142e-06, "loss": 0.6657, "step": 4984 }, { "epoch": 0.32, "grad_norm": 1.6001766854916302, "learning_rate": 7.96419394745295e-06, "loss": 0.6868, "step": 4985 }, { "epoch": 0.32, "grad_norm": 1.6705048787996823, "learning_rate": 7.963359123619654e-06, "loss": 0.7575, "step": 4986 }, { "epoch": 0.32, "grad_norm": 2.5578835963808784, "learning_rate": 7.962524172427412e-06, "loss": 0.8111, "step": 4987 }, { "epoch": 0.32, "grad_norm": 1.3162473247433055, "learning_rate": 7.961689093912103e-06, "loss": 0.7092, "step": 4988 }, { "epoch": 0.32, "grad_norm": 1.5136543239548874, "learning_rate": 7.960853888109624e-06, "loss": 0.6743, "step": 4989 }, { "epoch": 0.32, "grad_norm": 1.7414140839683045, "learning_rate": 7.960018555055863e-06, "loss": 0.7986, "step": 4990 }, { "epoch": 0.32, "grad_norm": 1.5263759968761963, "learning_rate": 7.959183094786726e-06, "loss": 0.76, "step": 4991 }, { "epoch": 0.32, "grad_norm": 1.471125892014486, "learning_rate": 7.958347507338117e-06, "loss": 0.7479, "step": 4992 }, { "epoch": 0.32, "grad_norm": 1.27597804870072, "learning_rate": 7.957511792745948e-06, "loss": 0.6139, "step": 4993 }, { "epoch": 0.32, "grad_norm": 1.8883474686940345, "learning_rate": 7.956675951046138e-06, "loss": 0.6753, "step": 4994 }, { "epoch": 0.32, "grad_norm": 1.6497391795696614, "learning_rate": 7.955839982274608e-06, "loss": 0.7764, "step": 4995 }, { "epoch": 0.32, "grad_norm": 1.575238804117042, "learning_rate": 7.955003886467287e-06, "loss": 0.81, "step": 4996 }, { "epoch": 0.32, "grad_norm": 1.236855683696091, "learning_rate": 7.954167663660108e-06, "loss": 0.723, "step": 4997 }, { "epoch": 0.32, "grad_norm": 1.2961675024883899, "learning_rate": 7.95333131388901e-06, "loss": 0.6275, "step": 4998 }, { "epoch": 0.32, "grad_norm": 0.9385786333646725, "learning_rate": 7.952494837189937e-06, "loss": 0.6153, "step": 4999 }, { "epoch": 0.32, "grad_norm": 1.1405545043831458, "learning_rate": 7.951658233598843e-06, "loss": 0.6969, "step": 5000 }, { "epoch": 0.32, "grad_norm": 1.574485506403577, "learning_rate": 7.950821503151678e-06, "loss": 0.6669, "step": 5001 }, { "epoch": 0.32, "grad_norm": 1.7822738253510673, "learning_rate": 7.949984645884406e-06, "loss": 0.7072, "step": 5002 }, { "epoch": 0.32, "grad_norm": 2.281515168390189, "learning_rate": 7.949147661832992e-06, "loss": 0.657, "step": 5003 }, { "epoch": 0.32, "grad_norm": 1.5279629420007814, "learning_rate": 7.948310551033408e-06, "loss": 0.6865, "step": 5004 }, { "epoch": 0.32, "grad_norm": 1.4412007427423752, "learning_rate": 7.947473313521635e-06, "loss": 0.6891, "step": 5005 }, { "epoch": 0.32, "grad_norm": 1.6205272760903684, "learning_rate": 7.94663594933365e-06, "loss": 0.6519, "step": 5006 }, { "epoch": 0.32, "grad_norm": 1.4624432346556635, "learning_rate": 7.945798458505442e-06, "loss": 0.7084, "step": 5007 }, { "epoch": 0.32, "grad_norm": 1.516029729288511, "learning_rate": 7.94496084107301e-06, "loss": 0.6714, "step": 5008 }, { "epoch": 0.32, "grad_norm": 1.60362752797396, "learning_rate": 7.944123097072348e-06, "loss": 0.7635, "step": 5009 }, { "epoch": 0.32, "grad_norm": 1.5368861488696801, "learning_rate": 7.94328522653946e-06, "loss": 0.6513, "step": 5010 }, { "epoch": 0.32, "grad_norm": 1.2139901391517773, "learning_rate": 7.942447229510359e-06, "loss": 0.6285, "step": 5011 }, { "epoch": 0.32, "grad_norm": 1.4445003162487355, "learning_rate": 7.941609106021059e-06, "loss": 0.7347, "step": 5012 }, { "epoch": 0.32, "grad_norm": 1.6645431932085812, "learning_rate": 7.94077085610758e-06, "loss": 0.6874, "step": 5013 }, { "epoch": 0.32, "grad_norm": 1.4156630220215298, "learning_rate": 7.93993247980595e-06, "loss": 0.6821, "step": 5014 }, { "epoch": 0.32, "grad_norm": 2.033225295493403, "learning_rate": 7.9390939771522e-06, "loss": 0.7127, "step": 5015 }, { "epoch": 0.32, "grad_norm": 1.6712438812951353, "learning_rate": 7.938255348182366e-06, "loss": 0.7226, "step": 5016 }, { "epoch": 0.32, "grad_norm": 1.1893954255690116, "learning_rate": 7.93741659293249e-06, "loss": 0.6766, "step": 5017 }, { "epoch": 0.32, "grad_norm": 1.050994229297584, "learning_rate": 7.936577711438624e-06, "loss": 0.6312, "step": 5018 }, { "epoch": 0.32, "grad_norm": 1.6866115108607904, "learning_rate": 7.935738703736817e-06, "loss": 0.7957, "step": 5019 }, { "epoch": 0.32, "grad_norm": 1.6258214376646332, "learning_rate": 7.93489956986313e-06, "loss": 0.5999, "step": 5020 }, { "epoch": 0.32, "grad_norm": 1.5942241231572871, "learning_rate": 7.934060309853627e-06, "loss": 0.7552, "step": 5021 }, { "epoch": 0.32, "grad_norm": 2.2819795886846883, "learning_rate": 7.933220923744375e-06, "loss": 0.7434, "step": 5022 }, { "epoch": 0.32, "grad_norm": 2.324229323999429, "learning_rate": 7.932381411571453e-06, "loss": 0.75, "step": 5023 }, { "epoch": 0.32, "grad_norm": 1.4231987153604218, "learning_rate": 7.93154177337094e-06, "loss": 0.7379, "step": 5024 }, { "epoch": 0.32, "grad_norm": 1.488846129584695, "learning_rate": 7.930702009178919e-06, "loss": 0.6624, "step": 5025 }, { "epoch": 0.32, "grad_norm": 1.5226478540076542, "learning_rate": 7.929862119031486e-06, "loss": 0.7945, "step": 5026 }, { "epoch": 0.32, "grad_norm": 1.580841029403284, "learning_rate": 7.929022102964736e-06, "loss": 0.737, "step": 5027 }, { "epoch": 0.32, "grad_norm": 1.3490212863152178, "learning_rate": 7.928181961014769e-06, "loss": 0.6124, "step": 5028 }, { "epoch": 0.32, "grad_norm": 1.5675906023847486, "learning_rate": 7.927341693217697e-06, "loss": 0.6136, "step": 5029 }, { "epoch": 0.32, "grad_norm": 1.5978301355875957, "learning_rate": 7.926501299609627e-06, "loss": 0.7388, "step": 5030 }, { "epoch": 0.32, "grad_norm": 1.4862814526508326, "learning_rate": 7.925660780226683e-06, "loss": 0.6893, "step": 5031 }, { "epoch": 0.32, "grad_norm": 1.4668183515995727, "learning_rate": 7.924820135104984e-06, "loss": 0.6786, "step": 5032 }, { "epoch": 0.32, "grad_norm": 1.4243941860480875, "learning_rate": 7.923979364280664e-06, "loss": 0.713, "step": 5033 }, { "epoch": 0.32, "grad_norm": 1.5746614933505316, "learning_rate": 7.923138467789853e-06, "loss": 0.6489, "step": 5034 }, { "epoch": 0.32, "grad_norm": 1.336742109003386, "learning_rate": 7.922297445668695e-06, "loss": 0.6579, "step": 5035 }, { "epoch": 0.32, "grad_norm": 1.714563144511361, "learning_rate": 7.921456297953331e-06, "loss": 0.6958, "step": 5036 }, { "epoch": 0.32, "grad_norm": 1.5722840244605245, "learning_rate": 7.920615024679917e-06, "loss": 0.6534, "step": 5037 }, { "epoch": 0.32, "grad_norm": 1.6069947797648878, "learning_rate": 7.919773625884604e-06, "loss": 0.7617, "step": 5038 }, { "epoch": 0.32, "grad_norm": 1.621820088049398, "learning_rate": 7.918932101603556e-06, "loss": 0.7076, "step": 5039 }, { "epoch": 0.32, "grad_norm": 1.1377480065958676, "learning_rate": 7.918090451872942e-06, "loss": 0.7231, "step": 5040 }, { "epoch": 0.32, "grad_norm": 1.712115450874208, "learning_rate": 7.917248676728929e-06, "loss": 0.7755, "step": 5041 }, { "epoch": 0.32, "grad_norm": 1.2196266190783505, "learning_rate": 7.9164067762077e-06, "loss": 0.5908, "step": 5042 }, { "epoch": 0.32, "grad_norm": 1.5486131082630061, "learning_rate": 7.915564750345436e-06, "loss": 0.733, "step": 5043 }, { "epoch": 0.32, "grad_norm": 1.591288372006591, "learning_rate": 7.914722599178324e-06, "loss": 0.6428, "step": 5044 }, { "epoch": 0.32, "grad_norm": 1.373143228373643, "learning_rate": 7.91388032274256e-06, "loss": 0.6696, "step": 5045 }, { "epoch": 0.32, "grad_norm": 1.6619492957898352, "learning_rate": 7.913037921074342e-06, "loss": 0.6717, "step": 5046 }, { "epoch": 0.32, "grad_norm": 1.5109356724463514, "learning_rate": 7.912195394209877e-06, "loss": 0.7746, "step": 5047 }, { "epoch": 0.32, "grad_norm": 1.4126657968672724, "learning_rate": 7.911352742185373e-06, "loss": 0.6715, "step": 5048 }, { "epoch": 0.32, "grad_norm": 1.4573963880095002, "learning_rate": 7.910509965037045e-06, "loss": 0.6084, "step": 5049 }, { "epoch": 0.32, "grad_norm": 1.5572304819597766, "learning_rate": 7.909667062801114e-06, "loss": 0.6647, "step": 5050 }, { "epoch": 0.32, "grad_norm": 1.4326122854453365, "learning_rate": 7.90882403551381e-06, "loss": 0.722, "step": 5051 }, { "epoch": 0.32, "grad_norm": 1.4984489508585233, "learning_rate": 7.907980883211357e-06, "loss": 0.7873, "step": 5052 }, { "epoch": 0.32, "grad_norm": 1.365476749151826, "learning_rate": 7.907137605929998e-06, "loss": 0.6744, "step": 5053 }, { "epoch": 0.32, "grad_norm": 1.4552334437074563, "learning_rate": 7.906294203705973e-06, "loss": 0.6638, "step": 5054 }, { "epoch": 0.32, "grad_norm": 1.6579334984635923, "learning_rate": 7.90545067657553e-06, "loss": 0.7467, "step": 5055 }, { "epoch": 0.32, "grad_norm": 1.7405133900908754, "learning_rate": 7.904607024574923e-06, "loss": 0.7401, "step": 5056 }, { "epoch": 0.32, "grad_norm": 1.541713464785651, "learning_rate": 7.903763247740407e-06, "loss": 0.6425, "step": 5057 }, { "epoch": 0.32, "grad_norm": 1.0673905840678717, "learning_rate": 7.902919346108252e-06, "loss": 0.6274, "step": 5058 }, { "epoch": 0.32, "grad_norm": 1.2789683326104122, "learning_rate": 7.902075319714722e-06, "loss": 0.6439, "step": 5059 }, { "epoch": 0.32, "grad_norm": 1.7019765429213018, "learning_rate": 7.901231168596092e-06, "loss": 0.6508, "step": 5060 }, { "epoch": 0.32, "grad_norm": 1.865511515777814, "learning_rate": 7.900386892788644e-06, "loss": 0.6094, "step": 5061 }, { "epoch": 0.32, "grad_norm": 1.0868253506505254, "learning_rate": 7.89954249232866e-06, "loss": 0.5871, "step": 5062 }, { "epoch": 0.32, "grad_norm": 1.6723372813917192, "learning_rate": 7.898697967252433e-06, "loss": 0.6967, "step": 5063 }, { "epoch": 0.32, "grad_norm": 1.4966792474330548, "learning_rate": 7.89785331759626e-06, "loss": 0.7903, "step": 5064 }, { "epoch": 0.32, "grad_norm": 1.4897842158216692, "learning_rate": 7.897008543396438e-06, "loss": 0.7751, "step": 5065 }, { "epoch": 0.32, "grad_norm": 1.3084972197527425, "learning_rate": 7.896163644689278e-06, "loss": 0.626, "step": 5066 }, { "epoch": 0.32, "grad_norm": 1.5868703130630388, "learning_rate": 7.89531862151109e-06, "loss": 0.6534, "step": 5067 }, { "epoch": 0.32, "grad_norm": 1.4429657907271953, "learning_rate": 7.894473473898191e-06, "loss": 0.7738, "step": 5068 }, { "epoch": 0.32, "grad_norm": 1.0782723283460607, "learning_rate": 7.893628201886906e-06, "loss": 0.641, "step": 5069 }, { "epoch": 0.32, "grad_norm": 1.0760728423618815, "learning_rate": 7.89278280551356e-06, "loss": 0.7083, "step": 5070 }, { "epoch": 0.32, "grad_norm": 1.5409995536985355, "learning_rate": 7.891937284814489e-06, "loss": 0.7295, "step": 5071 }, { "epoch": 0.32, "grad_norm": 1.7292830900967069, "learning_rate": 7.891091639826027e-06, "loss": 0.7797, "step": 5072 }, { "epoch": 0.32, "grad_norm": 1.3818843148797448, "learning_rate": 7.890245870584523e-06, "loss": 0.7475, "step": 5073 }, { "epoch": 0.32, "grad_norm": 1.534650424800725, "learning_rate": 7.889399977126327e-06, "loss": 0.7365, "step": 5074 }, { "epoch": 0.32, "grad_norm": 1.1275487789874854, "learning_rate": 7.888553959487788e-06, "loss": 0.6982, "step": 5075 }, { "epoch": 0.32, "grad_norm": 1.728592191081903, "learning_rate": 7.887707817705272e-06, "loss": 0.743, "step": 5076 }, { "epoch": 0.32, "grad_norm": 1.7557591634103193, "learning_rate": 7.886861551815139e-06, "loss": 0.7508, "step": 5077 }, { "epoch": 0.33, "grad_norm": 1.396625497461402, "learning_rate": 7.886015161853766e-06, "loss": 0.7162, "step": 5078 }, { "epoch": 0.33, "grad_norm": 1.6956691040312666, "learning_rate": 7.885168647857523e-06, "loss": 0.6811, "step": 5079 }, { "epoch": 0.33, "grad_norm": 1.0500230215085453, "learning_rate": 7.884322009862796e-06, "loss": 0.7012, "step": 5080 }, { "epoch": 0.33, "grad_norm": 1.4921198428009026, "learning_rate": 7.88347524790597e-06, "loss": 0.7437, "step": 5081 }, { "epoch": 0.33, "grad_norm": 1.729618058411664, "learning_rate": 7.882628362023435e-06, "loss": 0.7284, "step": 5082 }, { "epoch": 0.33, "grad_norm": 1.283573282923992, "learning_rate": 7.881781352251591e-06, "loss": 0.6953, "step": 5083 }, { "epoch": 0.33, "grad_norm": 1.5039516279592835, "learning_rate": 7.880934218626841e-06, "loss": 0.6433, "step": 5084 }, { "epoch": 0.33, "grad_norm": 1.5077086175386571, "learning_rate": 7.88008696118559e-06, "loss": 0.7259, "step": 5085 }, { "epoch": 0.33, "grad_norm": 1.49775733464118, "learning_rate": 7.879239579964256e-06, "loss": 0.7302, "step": 5086 }, { "epoch": 0.33, "grad_norm": 1.4730744930653255, "learning_rate": 7.878392074999252e-06, "loss": 0.7369, "step": 5087 }, { "epoch": 0.33, "grad_norm": 1.5451436009307704, "learning_rate": 7.877544446327006e-06, "loss": 0.6321, "step": 5088 }, { "epoch": 0.33, "grad_norm": 1.1084932569615333, "learning_rate": 7.876696693983947e-06, "loss": 0.6108, "step": 5089 }, { "epoch": 0.33, "grad_norm": 1.3847137269760423, "learning_rate": 7.875848818006508e-06, "loss": 0.7032, "step": 5090 }, { "epoch": 0.33, "grad_norm": 1.7086453872902994, "learning_rate": 7.875000818431132e-06, "loss": 0.7591, "step": 5091 }, { "epoch": 0.33, "grad_norm": 1.5828086287957044, "learning_rate": 7.874152695294258e-06, "loss": 0.6059, "step": 5092 }, { "epoch": 0.33, "grad_norm": 1.3103187529514588, "learning_rate": 7.873304448632345e-06, "loss": 0.7775, "step": 5093 }, { "epoch": 0.33, "grad_norm": 1.498165307444128, "learning_rate": 7.87245607848184e-06, "loss": 0.7268, "step": 5094 }, { "epoch": 0.33, "grad_norm": 1.4740775631370306, "learning_rate": 7.871607584879211e-06, "loss": 0.6967, "step": 5095 }, { "epoch": 0.33, "grad_norm": 1.1569974208720846, "learning_rate": 7.870758967860923e-06, "loss": 0.6874, "step": 5096 }, { "epoch": 0.33, "grad_norm": 1.6513248554697042, "learning_rate": 7.869910227463447e-06, "loss": 0.7282, "step": 5097 }, { "epoch": 0.33, "grad_norm": 1.5201383697225048, "learning_rate": 7.869061363723256e-06, "loss": 0.7775, "step": 5098 }, { "epoch": 0.33, "grad_norm": 1.7131131605114185, "learning_rate": 7.86821237667684e-06, "loss": 0.6539, "step": 5099 }, { "epoch": 0.33, "grad_norm": 1.6054261950709152, "learning_rate": 7.867363266360682e-06, "loss": 0.6886, "step": 5100 }, { "epoch": 0.33, "grad_norm": 1.6136004328337474, "learning_rate": 7.866514032811276e-06, "loss": 0.6896, "step": 5101 }, { "epoch": 0.33, "grad_norm": 1.4452345518422414, "learning_rate": 7.865664676065122e-06, "loss": 0.6502, "step": 5102 }, { "epoch": 0.33, "grad_norm": 1.321881595675868, "learning_rate": 7.864815196158718e-06, "loss": 0.6749, "step": 5103 }, { "epoch": 0.33, "grad_norm": 1.496052905443436, "learning_rate": 7.86396559312858e-06, "loss": 0.6212, "step": 5104 }, { "epoch": 0.33, "grad_norm": 1.3735739830049611, "learning_rate": 7.863115867011217e-06, "loss": 0.6663, "step": 5105 }, { "epoch": 0.33, "grad_norm": 1.8191704969336266, "learning_rate": 7.86226601784315e-06, "loss": 0.7024, "step": 5106 }, { "epoch": 0.33, "grad_norm": 1.4254895320946719, "learning_rate": 7.861416045660906e-06, "loss": 0.68, "step": 5107 }, { "epoch": 0.33, "grad_norm": 1.579320067393965, "learning_rate": 7.860565950501012e-06, "loss": 0.6343, "step": 5108 }, { "epoch": 0.33, "grad_norm": 1.4844846462266428, "learning_rate": 7.859715732400004e-06, "loss": 0.7306, "step": 5109 }, { "epoch": 0.33, "grad_norm": 1.6809477761920804, "learning_rate": 7.858865391394422e-06, "loss": 0.6717, "step": 5110 }, { "epoch": 0.33, "grad_norm": 1.3922369141241737, "learning_rate": 7.858014927520815e-06, "loss": 0.7505, "step": 5111 }, { "epoch": 0.33, "grad_norm": 1.591997843625192, "learning_rate": 7.85716434081573e-06, "loss": 0.7178, "step": 5112 }, { "epoch": 0.33, "grad_norm": 1.52548214287017, "learning_rate": 7.856313631315726e-06, "loss": 0.7131, "step": 5113 }, { "epoch": 0.33, "grad_norm": 1.8006263249724233, "learning_rate": 7.855462799057364e-06, "loss": 0.7192, "step": 5114 }, { "epoch": 0.33, "grad_norm": 1.3987216116635068, "learning_rate": 7.854611844077213e-06, "loss": 0.6677, "step": 5115 }, { "epoch": 0.33, "grad_norm": 1.4573371118734668, "learning_rate": 7.853760766411841e-06, "loss": 0.8539, "step": 5116 }, { "epoch": 0.33, "grad_norm": 1.5575911409201575, "learning_rate": 7.852909566097828e-06, "loss": 0.6709, "step": 5117 }, { "epoch": 0.33, "grad_norm": 1.5939323912911287, "learning_rate": 7.852058243171757e-06, "loss": 0.644, "step": 5118 }, { "epoch": 0.33, "grad_norm": 1.5457716443608784, "learning_rate": 7.851206797670217e-06, "loss": 0.6158, "step": 5119 }, { "epoch": 0.33, "grad_norm": 1.5530570503385743, "learning_rate": 7.850355229629797e-06, "loss": 0.7458, "step": 5120 }, { "epoch": 0.33, "grad_norm": 1.7729705709876822, "learning_rate": 7.849503539087102e-06, "loss": 0.6974, "step": 5121 }, { "epoch": 0.33, "grad_norm": 1.7345356940711545, "learning_rate": 7.84865172607873e-06, "loss": 0.6944, "step": 5122 }, { "epoch": 0.33, "grad_norm": 1.6522427974284644, "learning_rate": 7.847799790641295e-06, "loss": 0.6143, "step": 5123 }, { "epoch": 0.33, "grad_norm": 1.5118627127962823, "learning_rate": 7.846947732811408e-06, "loss": 0.6935, "step": 5124 }, { "epoch": 0.33, "grad_norm": 1.345080348030526, "learning_rate": 7.846095552625688e-06, "loss": 0.6805, "step": 5125 }, { "epoch": 0.33, "grad_norm": 1.7837251787901758, "learning_rate": 7.845243250120764e-06, "loss": 0.7455, "step": 5126 }, { "epoch": 0.33, "grad_norm": 1.1067871124821302, "learning_rate": 7.844390825333264e-06, "loss": 0.6624, "step": 5127 }, { "epoch": 0.33, "grad_norm": 1.6243502943685768, "learning_rate": 7.84353827829982e-06, "loss": 0.7794, "step": 5128 }, { "epoch": 0.33, "grad_norm": 1.8096564927855974, "learning_rate": 7.842685609057078e-06, "loss": 0.7033, "step": 5129 }, { "epoch": 0.33, "grad_norm": 1.5548182347075477, "learning_rate": 7.841832817641682e-06, "loss": 0.6977, "step": 5130 }, { "epoch": 0.33, "grad_norm": 1.4923210810417709, "learning_rate": 7.840979904090282e-06, "loss": 0.6435, "step": 5131 }, { "epoch": 0.33, "grad_norm": 1.7484575869527983, "learning_rate": 7.840126868439537e-06, "loss": 0.7145, "step": 5132 }, { "epoch": 0.33, "grad_norm": 1.725229653998412, "learning_rate": 7.839273710726107e-06, "loss": 0.7799, "step": 5133 }, { "epoch": 0.33, "grad_norm": 1.489589707999246, "learning_rate": 7.838420430986658e-06, "loss": 0.6735, "step": 5134 }, { "epoch": 0.33, "grad_norm": 1.4103557800397661, "learning_rate": 7.837567029257864e-06, "loss": 0.814, "step": 5135 }, { "epoch": 0.33, "grad_norm": 1.52616233034365, "learning_rate": 7.836713505576402e-06, "loss": 0.6867, "step": 5136 }, { "epoch": 0.33, "grad_norm": 1.6350783075589748, "learning_rate": 7.835859859978952e-06, "loss": 0.7635, "step": 5137 }, { "epoch": 0.33, "grad_norm": 2.233740521632782, "learning_rate": 7.835006092502207e-06, "loss": 0.7173, "step": 5138 }, { "epoch": 0.33, "grad_norm": 1.5174387036545698, "learning_rate": 7.834152203182854e-06, "loss": 0.627, "step": 5139 }, { "epoch": 0.33, "grad_norm": 1.7064911347241134, "learning_rate": 7.833298192057598e-06, "loss": 0.7025, "step": 5140 }, { "epoch": 0.33, "grad_norm": 1.316856695672523, "learning_rate": 7.832444059163137e-06, "loss": 0.7681, "step": 5141 }, { "epoch": 0.33, "grad_norm": 1.0132455998614607, "learning_rate": 7.831589804536184e-06, "loss": 0.6102, "step": 5142 }, { "epoch": 0.33, "grad_norm": 1.017050259173357, "learning_rate": 7.830735428213451e-06, "loss": 0.5727, "step": 5143 }, { "epoch": 0.33, "grad_norm": 1.699451124899921, "learning_rate": 7.829880930231657e-06, "loss": 0.7305, "step": 5144 }, { "epoch": 0.33, "grad_norm": 2.1983157501373465, "learning_rate": 7.829026310627526e-06, "loss": 0.7148, "step": 5145 }, { "epoch": 0.33, "grad_norm": 1.7354735528601588, "learning_rate": 7.828171569437789e-06, "loss": 0.7396, "step": 5146 }, { "epoch": 0.33, "grad_norm": 1.6372189906265047, "learning_rate": 7.827316706699182e-06, "loss": 0.6588, "step": 5147 }, { "epoch": 0.33, "grad_norm": 4.13678776999825, "learning_rate": 7.826461722448445e-06, "loss": 0.7003, "step": 5148 }, { "epoch": 0.33, "grad_norm": 1.5295310245229505, "learning_rate": 7.825606616722319e-06, "loss": 0.6811, "step": 5149 }, { "epoch": 0.33, "grad_norm": 1.4656095788419685, "learning_rate": 7.82475138955756e-06, "loss": 0.6718, "step": 5150 }, { "epoch": 0.33, "grad_norm": 1.5588175548304999, "learning_rate": 7.823896040990922e-06, "loss": 0.6878, "step": 5151 }, { "epoch": 0.33, "grad_norm": 1.41472387671047, "learning_rate": 7.823040571059165e-06, "loss": 0.6543, "step": 5152 }, { "epoch": 0.33, "grad_norm": 1.2699075616281261, "learning_rate": 7.822184979799057e-06, "loss": 0.7568, "step": 5153 }, { "epoch": 0.33, "grad_norm": 1.526465683356843, "learning_rate": 7.821329267247369e-06, "loss": 0.681, "step": 5154 }, { "epoch": 0.33, "grad_norm": 1.6019746309288978, "learning_rate": 7.820473433440876e-06, "loss": 0.7405, "step": 5155 }, { "epoch": 0.33, "grad_norm": 1.5205216598374844, "learning_rate": 7.819617478416364e-06, "loss": 0.6632, "step": 5156 }, { "epoch": 0.33, "grad_norm": 1.5155734840363682, "learning_rate": 7.818761402210616e-06, "loss": 0.6506, "step": 5157 }, { "epoch": 0.33, "grad_norm": 1.1316151857618104, "learning_rate": 7.817905204860426e-06, "loss": 0.64, "step": 5158 }, { "epoch": 0.33, "grad_norm": 1.5404979175600781, "learning_rate": 7.817048886402591e-06, "loss": 0.7614, "step": 5159 }, { "epoch": 0.33, "grad_norm": 1.568299667123074, "learning_rate": 7.816192446873917e-06, "loss": 0.7025, "step": 5160 }, { "epoch": 0.33, "grad_norm": 1.5431927774296772, "learning_rate": 7.815335886311205e-06, "loss": 0.8055, "step": 5161 }, { "epoch": 0.33, "grad_norm": 1.6160390253649979, "learning_rate": 7.814479204751276e-06, "loss": 0.778, "step": 5162 }, { "epoch": 0.33, "grad_norm": 1.5724273592218196, "learning_rate": 7.813622402230943e-06, "loss": 0.6763, "step": 5163 }, { "epoch": 0.33, "grad_norm": 1.5783732461296451, "learning_rate": 7.812765478787034e-06, "loss": 0.6039, "step": 5164 }, { "epoch": 0.33, "grad_norm": 1.681192497254798, "learning_rate": 7.811908434456372e-06, "loss": 0.7616, "step": 5165 }, { "epoch": 0.33, "grad_norm": 1.668779213888126, "learning_rate": 7.811051269275795e-06, "loss": 0.6404, "step": 5166 }, { "epoch": 0.33, "grad_norm": 1.8139488009986195, "learning_rate": 7.810193983282142e-06, "loss": 0.6751, "step": 5167 }, { "epoch": 0.33, "grad_norm": 1.1149174682220007, "learning_rate": 7.809336576512257e-06, "loss": 0.6523, "step": 5168 }, { "epoch": 0.33, "grad_norm": 1.6693554113527194, "learning_rate": 7.808479049002988e-06, "loss": 0.7197, "step": 5169 }, { "epoch": 0.33, "grad_norm": 1.4245494556194327, "learning_rate": 7.80762140079119e-06, "loss": 0.7405, "step": 5170 }, { "epoch": 0.33, "grad_norm": 1.4165257935002489, "learning_rate": 7.806763631913726e-06, "loss": 0.7635, "step": 5171 }, { "epoch": 0.33, "grad_norm": 1.5391371126779823, "learning_rate": 7.805905742407458e-06, "loss": 0.7807, "step": 5172 }, { "epoch": 0.33, "grad_norm": 1.9568475514178185, "learning_rate": 7.805047732309257e-06, "loss": 0.8111, "step": 5173 }, { "epoch": 0.33, "grad_norm": 1.5151384099042529, "learning_rate": 7.804189601655999e-06, "loss": 0.8052, "step": 5174 }, { "epoch": 0.33, "grad_norm": 1.3941998649539167, "learning_rate": 7.803331350484563e-06, "loss": 0.7291, "step": 5175 }, { "epoch": 0.33, "grad_norm": 1.1079594385536595, "learning_rate": 7.802472978831838e-06, "loss": 0.6002, "step": 5176 }, { "epoch": 0.33, "grad_norm": 1.3517766018180475, "learning_rate": 7.801614486734712e-06, "loss": 0.6388, "step": 5177 }, { "epoch": 0.33, "grad_norm": 1.5758323263318323, "learning_rate": 7.800755874230084e-06, "loss": 0.7933, "step": 5178 }, { "epoch": 0.33, "grad_norm": 1.1393993147392032, "learning_rate": 7.799897141354854e-06, "loss": 0.6861, "step": 5179 }, { "epoch": 0.33, "grad_norm": 1.429253929981301, "learning_rate": 7.799038288145926e-06, "loss": 0.7152, "step": 5180 }, { "epoch": 0.33, "grad_norm": 1.3938665207363825, "learning_rate": 7.798179314640214e-06, "loss": 0.7121, "step": 5181 }, { "epoch": 0.33, "grad_norm": 1.6738023558620627, "learning_rate": 7.797320220874636e-06, "loss": 0.7091, "step": 5182 }, { "epoch": 0.33, "grad_norm": 1.8432422160200188, "learning_rate": 7.79646100688611e-06, "loss": 0.7953, "step": 5183 }, { "epoch": 0.33, "grad_norm": 1.5086280936793655, "learning_rate": 7.795601672711571e-06, "loss": 0.6961, "step": 5184 }, { "epoch": 0.33, "grad_norm": 1.4020996101300782, "learning_rate": 7.794742218387943e-06, "loss": 0.7333, "step": 5185 }, { "epoch": 0.33, "grad_norm": 1.6223547190208099, "learning_rate": 7.793882643952169e-06, "loss": 0.7517, "step": 5186 }, { "epoch": 0.33, "grad_norm": 1.6271287316349696, "learning_rate": 7.793022949441189e-06, "loss": 0.7474, "step": 5187 }, { "epoch": 0.33, "grad_norm": 1.5250411689882089, "learning_rate": 7.792163134891952e-06, "loss": 0.7628, "step": 5188 }, { "epoch": 0.33, "grad_norm": 1.3963947708412436, "learning_rate": 7.79130320034141e-06, "loss": 0.6489, "step": 5189 }, { "epoch": 0.33, "grad_norm": 1.4435512630455098, "learning_rate": 7.790443145826522e-06, "loss": 0.7038, "step": 5190 }, { "epoch": 0.33, "grad_norm": 1.6972236127460394, "learning_rate": 7.789582971384252e-06, "loss": 0.7602, "step": 5191 }, { "epoch": 0.33, "grad_norm": 1.454286290262103, "learning_rate": 7.788722677051565e-06, "loss": 0.7065, "step": 5192 }, { "epoch": 0.33, "grad_norm": 1.108878499424396, "learning_rate": 7.787862262865438e-06, "loss": 0.6593, "step": 5193 }, { "epoch": 0.33, "grad_norm": 2.1058039755513587, "learning_rate": 7.78700172886285e-06, "loss": 0.6864, "step": 5194 }, { "epoch": 0.33, "grad_norm": 1.153495629689261, "learning_rate": 7.786141075080786e-06, "loss": 0.7599, "step": 5195 }, { "epoch": 0.33, "grad_norm": 1.08588383354525, "learning_rate": 7.78528030155623e-06, "loss": 0.5426, "step": 5196 }, { "epoch": 0.33, "grad_norm": 1.566994139649479, "learning_rate": 7.784419408326181e-06, "loss": 0.7525, "step": 5197 }, { "epoch": 0.33, "grad_norm": 1.4501589673365243, "learning_rate": 7.783558395427636e-06, "loss": 0.7389, "step": 5198 }, { "epoch": 0.33, "grad_norm": 1.6688501594523777, "learning_rate": 7.782697262897601e-06, "loss": 0.7493, "step": 5199 }, { "epoch": 0.33, "grad_norm": 1.4973933802137265, "learning_rate": 7.781836010773082e-06, "loss": 0.5765, "step": 5200 }, { "epoch": 0.33, "grad_norm": 1.4693167946168895, "learning_rate": 7.7809746390911e-06, "loss": 0.6817, "step": 5201 }, { "epoch": 0.33, "grad_norm": 1.5996807002617859, "learning_rate": 7.78011314788867e-06, "loss": 0.7433, "step": 5202 }, { "epoch": 0.33, "grad_norm": 1.4312709713393363, "learning_rate": 7.77925153720282e-06, "loss": 0.6775, "step": 5203 }, { "epoch": 0.33, "grad_norm": 1.4700936537686682, "learning_rate": 7.778389807070578e-06, "loss": 0.7155, "step": 5204 }, { "epoch": 0.33, "grad_norm": 1.4705494903177878, "learning_rate": 7.777527957528982e-06, "loss": 0.7758, "step": 5205 }, { "epoch": 0.33, "grad_norm": 1.2356885836710338, "learning_rate": 7.776665988615066e-06, "loss": 0.5543, "step": 5206 }, { "epoch": 0.33, "grad_norm": 1.9481959947014178, "learning_rate": 7.775803900365885e-06, "loss": 0.6961, "step": 5207 }, { "epoch": 0.33, "grad_norm": 1.599363622527812, "learning_rate": 7.774941692818484e-06, "loss": 0.699, "step": 5208 }, { "epoch": 0.33, "grad_norm": 1.4567309373820654, "learning_rate": 7.77407936600992e-06, "loss": 0.6898, "step": 5209 }, { "epoch": 0.33, "grad_norm": 1.3876305267955837, "learning_rate": 7.773216919977254e-06, "loss": 0.66, "step": 5210 }, { "epoch": 0.33, "grad_norm": 1.5000747344346657, "learning_rate": 7.77235435475755e-06, "loss": 0.653, "step": 5211 }, { "epoch": 0.33, "grad_norm": 1.857814567180509, "learning_rate": 7.771491670387884e-06, "loss": 0.745, "step": 5212 }, { "epoch": 0.33, "grad_norm": 1.3374020490611185, "learning_rate": 7.77062886690533e-06, "loss": 0.7556, "step": 5213 }, { "epoch": 0.33, "grad_norm": 1.4405307183957206, "learning_rate": 7.769765944346967e-06, "loss": 0.6396, "step": 5214 }, { "epoch": 0.33, "grad_norm": 1.471813164209299, "learning_rate": 7.768902902749885e-06, "loss": 0.7325, "step": 5215 }, { "epoch": 0.33, "grad_norm": 1.5745392417885915, "learning_rate": 7.768039742151174e-06, "loss": 0.6074, "step": 5216 }, { "epoch": 0.33, "grad_norm": 1.5696895668661308, "learning_rate": 7.767176462587932e-06, "loss": 0.6756, "step": 5217 }, { "epoch": 0.33, "grad_norm": 1.6125801785123914, "learning_rate": 7.766313064097261e-06, "loss": 0.7138, "step": 5218 }, { "epoch": 0.33, "grad_norm": 1.3825072926475337, "learning_rate": 7.765449546716266e-06, "loss": 0.7201, "step": 5219 }, { "epoch": 0.33, "grad_norm": 1.580391319956623, "learning_rate": 7.76458591048206e-06, "loss": 0.7349, "step": 5220 }, { "epoch": 0.33, "grad_norm": 1.567929825392232, "learning_rate": 7.76372215543176e-06, "loss": 0.714, "step": 5221 }, { "epoch": 0.33, "grad_norm": 1.4779603441391385, "learning_rate": 7.762858281602492e-06, "loss": 0.6757, "step": 5222 }, { "epoch": 0.33, "grad_norm": 1.616996390632365, "learning_rate": 7.76199428903138e-06, "loss": 0.8016, "step": 5223 }, { "epoch": 0.33, "grad_norm": 1.549678325705114, "learning_rate": 7.761130177755556e-06, "loss": 0.7549, "step": 5224 }, { "epoch": 0.33, "grad_norm": 1.5868667540681074, "learning_rate": 7.76026594781216e-06, "loss": 0.6765, "step": 5225 }, { "epoch": 0.33, "grad_norm": 1.4466523123255046, "learning_rate": 7.759401599238334e-06, "loss": 0.6225, "step": 5226 }, { "epoch": 0.33, "grad_norm": 1.547885526128444, "learning_rate": 7.758537132071224e-06, "loss": 0.746, "step": 5227 }, { "epoch": 0.33, "grad_norm": 1.3852585271816644, "learning_rate": 7.757672546347984e-06, "loss": 0.6637, "step": 5228 }, { "epoch": 0.33, "grad_norm": 1.0514425289991618, "learning_rate": 7.756807842105774e-06, "loss": 0.5663, "step": 5229 }, { "epoch": 0.33, "grad_norm": 1.5139999656374201, "learning_rate": 7.755943019381756e-06, "loss": 0.741, "step": 5230 }, { "epoch": 0.33, "grad_norm": 1.0548461119844106, "learning_rate": 7.755078078213099e-06, "loss": 0.6564, "step": 5231 }, { "epoch": 0.33, "grad_norm": 1.4102565766367812, "learning_rate": 7.754213018636973e-06, "loss": 0.7321, "step": 5232 }, { "epoch": 0.33, "grad_norm": 1.0899302897076737, "learning_rate": 7.75334784069056e-06, "loss": 0.748, "step": 5233 }, { "epoch": 0.34, "grad_norm": 1.6225293825569913, "learning_rate": 7.752482544411045e-06, "loss": 0.6568, "step": 5234 }, { "epoch": 0.34, "grad_norm": 1.92365011309626, "learning_rate": 7.75161712983561e-06, "loss": 0.7094, "step": 5235 }, { "epoch": 0.34, "grad_norm": 1.4718766991055343, "learning_rate": 7.750751597001454e-06, "loss": 0.6423, "step": 5236 }, { "epoch": 0.34, "grad_norm": 1.2532503065477372, "learning_rate": 7.749885945945774e-06, "loss": 0.7771, "step": 5237 }, { "epoch": 0.34, "grad_norm": 1.7307529991261237, "learning_rate": 7.749020176705775e-06, "loss": 0.6726, "step": 5238 }, { "epoch": 0.34, "grad_norm": 1.461080815119019, "learning_rate": 7.748154289318666e-06, "loss": 0.6548, "step": 5239 }, { "epoch": 0.34, "grad_norm": 1.7832837224256413, "learning_rate": 7.747288283821659e-06, "loss": 0.6999, "step": 5240 }, { "epoch": 0.34, "grad_norm": 1.616637079446525, "learning_rate": 7.746422160251975e-06, "loss": 0.7775, "step": 5241 }, { "epoch": 0.34, "grad_norm": 1.1913106932418993, "learning_rate": 7.745555918646836e-06, "loss": 0.6092, "step": 5242 }, { "epoch": 0.34, "grad_norm": 1.518650145154018, "learning_rate": 7.744689559043475e-06, "loss": 0.7338, "step": 5243 }, { "epoch": 0.34, "grad_norm": 1.0133147708399761, "learning_rate": 7.743823081479125e-06, "loss": 0.6548, "step": 5244 }, { "epoch": 0.34, "grad_norm": 1.5093253742369208, "learning_rate": 7.742956485991022e-06, "loss": 0.6949, "step": 5245 }, { "epoch": 0.34, "grad_norm": 1.4016578389669403, "learning_rate": 7.742089772616415e-06, "loss": 0.6973, "step": 5246 }, { "epoch": 0.34, "grad_norm": 1.6724646184948249, "learning_rate": 7.74122294139255e-06, "loss": 0.6984, "step": 5247 }, { "epoch": 0.34, "grad_norm": 1.5640965470988355, "learning_rate": 7.740355992356682e-06, "loss": 0.698, "step": 5248 }, { "epoch": 0.34, "grad_norm": 1.7245414955852412, "learning_rate": 7.739488925546074e-06, "loss": 0.6981, "step": 5249 }, { "epoch": 0.34, "grad_norm": 1.069968486795371, "learning_rate": 7.73862174099799e-06, "loss": 0.7249, "step": 5250 }, { "epoch": 0.34, "grad_norm": 1.4853293974073698, "learning_rate": 7.737754438749694e-06, "loss": 0.661, "step": 5251 }, { "epoch": 0.34, "grad_norm": 1.1966428516669552, "learning_rate": 7.736887018838467e-06, "loss": 0.6107, "step": 5252 }, { "epoch": 0.34, "grad_norm": 1.4990972738817696, "learning_rate": 7.736019481301587e-06, "loss": 0.7132, "step": 5253 }, { "epoch": 0.34, "grad_norm": 1.4743577350916077, "learning_rate": 7.735151826176339e-06, "loss": 0.6297, "step": 5254 }, { "epoch": 0.34, "grad_norm": 1.639275607513311, "learning_rate": 7.734284053500015e-06, "loss": 0.7603, "step": 5255 }, { "epoch": 0.34, "grad_norm": 1.4297623080466455, "learning_rate": 7.733416163309904e-06, "loss": 0.7242, "step": 5256 }, { "epoch": 0.34, "grad_norm": 1.4458475421354893, "learning_rate": 7.732548155643311e-06, "loss": 0.7508, "step": 5257 }, { "epoch": 0.34, "grad_norm": 1.470003894100569, "learning_rate": 7.731680030537541e-06, "loss": 0.7999, "step": 5258 }, { "epoch": 0.34, "grad_norm": 1.6103973660185684, "learning_rate": 7.730811788029903e-06, "loss": 0.6988, "step": 5259 }, { "epoch": 0.34, "grad_norm": 1.7007578128632443, "learning_rate": 7.72994342815771e-06, "loss": 0.6817, "step": 5260 }, { "epoch": 0.34, "grad_norm": 1.1399309783309541, "learning_rate": 7.72907495095829e-06, "loss": 0.6851, "step": 5261 }, { "epoch": 0.34, "grad_norm": 1.4160243627522913, "learning_rate": 7.728206356468962e-06, "loss": 0.7377, "step": 5262 }, { "epoch": 0.34, "grad_norm": 1.3964584089188736, "learning_rate": 7.727337644727055e-06, "loss": 0.7165, "step": 5263 }, { "epoch": 0.34, "grad_norm": 1.1880817731073654, "learning_rate": 7.726468815769907e-06, "loss": 0.7427, "step": 5264 }, { "epoch": 0.34, "grad_norm": 1.421394051582415, "learning_rate": 7.725599869634861e-06, "loss": 0.6559, "step": 5265 }, { "epoch": 0.34, "grad_norm": 1.449463568659789, "learning_rate": 7.724730806359258e-06, "loss": 0.6785, "step": 5266 }, { "epoch": 0.34, "grad_norm": 1.4416912059152798, "learning_rate": 7.72386162598045e-06, "loss": 0.624, "step": 5267 }, { "epoch": 0.34, "grad_norm": 1.628739469855019, "learning_rate": 7.722992328535795e-06, "loss": 0.7674, "step": 5268 }, { "epoch": 0.34, "grad_norm": 1.5310690977808419, "learning_rate": 7.722122914062649e-06, "loss": 0.7731, "step": 5269 }, { "epoch": 0.34, "grad_norm": 1.5305133464506402, "learning_rate": 7.721253382598382e-06, "loss": 0.7497, "step": 5270 }, { "epoch": 0.34, "grad_norm": 1.4201454531543949, "learning_rate": 7.720383734180362e-06, "loss": 0.7015, "step": 5271 }, { "epoch": 0.34, "grad_norm": 1.5462537116139106, "learning_rate": 7.719513968845967e-06, "loss": 0.6338, "step": 5272 }, { "epoch": 0.34, "grad_norm": 1.6833639081912122, "learning_rate": 7.718644086632575e-06, "loss": 0.7824, "step": 5273 }, { "epoch": 0.34, "grad_norm": 1.6631254446170343, "learning_rate": 7.717774087577572e-06, "loss": 0.6774, "step": 5274 }, { "epoch": 0.34, "grad_norm": 2.201937632359628, "learning_rate": 7.71690397171835e-06, "loss": 0.6986, "step": 5275 }, { "epoch": 0.34, "grad_norm": 1.5784611123568624, "learning_rate": 7.716033739092304e-06, "loss": 0.6392, "step": 5276 }, { "epoch": 0.34, "grad_norm": 1.5871316841648448, "learning_rate": 7.715163389736834e-06, "loss": 0.7601, "step": 5277 }, { "epoch": 0.34, "grad_norm": 1.3844855706004642, "learning_rate": 7.714292923689347e-06, "loss": 0.7422, "step": 5278 }, { "epoch": 0.34, "grad_norm": 1.21367339876086, "learning_rate": 7.713422340987258e-06, "loss": 0.677, "step": 5279 }, { "epoch": 0.34, "grad_norm": 1.5943108520927896, "learning_rate": 7.712551641667975e-06, "loss": 0.7039, "step": 5280 }, { "epoch": 0.34, "grad_norm": 1.2711913527734398, "learning_rate": 7.711680825768922e-06, "loss": 0.7268, "step": 5281 }, { "epoch": 0.34, "grad_norm": 1.8277759336307824, "learning_rate": 7.710809893327527e-06, "loss": 0.7698, "step": 5282 }, { "epoch": 0.34, "grad_norm": 1.1117340223745464, "learning_rate": 7.709938844381217e-06, "loss": 0.5923, "step": 5283 }, { "epoch": 0.34, "grad_norm": 1.6050456783633549, "learning_rate": 7.70906767896743e-06, "loss": 0.7414, "step": 5284 }, { "epoch": 0.34, "grad_norm": 1.3124329337470824, "learning_rate": 7.708196397123607e-06, "loss": 0.6603, "step": 5285 }, { "epoch": 0.34, "grad_norm": 1.4800654808825544, "learning_rate": 7.707324998887193e-06, "loss": 0.8084, "step": 5286 }, { "epoch": 0.34, "grad_norm": 1.5505999776497978, "learning_rate": 7.706453484295643e-06, "loss": 0.7549, "step": 5287 }, { "epoch": 0.34, "grad_norm": 1.4864769935474522, "learning_rate": 7.705581853386405e-06, "loss": 0.7565, "step": 5288 }, { "epoch": 0.34, "grad_norm": 1.106000196029309, "learning_rate": 7.704710106196947e-06, "loss": 0.6025, "step": 5289 }, { "epoch": 0.34, "grad_norm": 1.4278221952943355, "learning_rate": 7.70383824276473e-06, "loss": 0.6332, "step": 5290 }, { "epoch": 0.34, "grad_norm": 1.4334700396194937, "learning_rate": 7.702966263127227e-06, "loss": 0.6339, "step": 5291 }, { "epoch": 0.34, "grad_norm": 1.5876452072691274, "learning_rate": 7.702094167321915e-06, "loss": 0.6829, "step": 5292 }, { "epoch": 0.34, "grad_norm": 1.4177940651924794, "learning_rate": 7.701221955386273e-06, "loss": 0.6947, "step": 5293 }, { "epoch": 0.34, "grad_norm": 1.6334554818448244, "learning_rate": 7.700349627357785e-06, "loss": 0.6382, "step": 5294 }, { "epoch": 0.34, "grad_norm": 1.4344283018423636, "learning_rate": 7.699477183273949e-06, "loss": 0.5968, "step": 5295 }, { "epoch": 0.34, "grad_norm": 1.4801619791395058, "learning_rate": 7.698604623172253e-06, "loss": 0.6914, "step": 5296 }, { "epoch": 0.34, "grad_norm": 1.6951771708425816, "learning_rate": 7.6977319470902e-06, "loss": 0.7418, "step": 5297 }, { "epoch": 0.34, "grad_norm": 1.1641765500029921, "learning_rate": 7.696859155065299e-06, "loss": 0.6116, "step": 5298 }, { "epoch": 0.34, "grad_norm": 1.4408319981713986, "learning_rate": 7.695986247135058e-06, "loss": 0.6483, "step": 5299 }, { "epoch": 0.34, "grad_norm": 1.497822449134965, "learning_rate": 7.695113223336993e-06, "loss": 0.7145, "step": 5300 }, { "epoch": 0.34, "grad_norm": 1.5493808037364902, "learning_rate": 7.694240083708623e-06, "loss": 0.7456, "step": 5301 }, { "epoch": 0.34, "grad_norm": 1.4123628219207094, "learning_rate": 7.693366828287478e-06, "loss": 0.7359, "step": 5302 }, { "epoch": 0.34, "grad_norm": 1.8781611069215318, "learning_rate": 7.692493457111085e-06, "loss": 0.703, "step": 5303 }, { "epoch": 0.34, "grad_norm": 1.5818087875542413, "learning_rate": 7.69161997021698e-06, "loss": 0.6673, "step": 5304 }, { "epoch": 0.34, "grad_norm": 1.5411226380955052, "learning_rate": 7.690746367642707e-06, "loss": 0.6906, "step": 5305 }, { "epoch": 0.34, "grad_norm": 1.4151261376690671, "learning_rate": 7.68987264942581e-06, "loss": 0.7316, "step": 5306 }, { "epoch": 0.34, "grad_norm": 1.6539921665308395, "learning_rate": 7.688998815603837e-06, "loss": 0.6422, "step": 5307 }, { "epoch": 0.34, "grad_norm": 1.2592076020809198, "learning_rate": 7.688124866214345e-06, "loss": 0.6852, "step": 5308 }, { "epoch": 0.34, "grad_norm": 1.496764990280387, "learning_rate": 7.687250801294893e-06, "loss": 0.7088, "step": 5309 }, { "epoch": 0.34, "grad_norm": 1.5583026105101272, "learning_rate": 7.686376620883052e-06, "loss": 0.6872, "step": 5310 }, { "epoch": 0.34, "grad_norm": 0.999424474970324, "learning_rate": 7.685502325016384e-06, "loss": 0.6661, "step": 5311 }, { "epoch": 0.34, "grad_norm": 1.806146277650428, "learning_rate": 7.684627913732473e-06, "loss": 0.8009, "step": 5312 }, { "epoch": 0.34, "grad_norm": 1.5016319368427338, "learning_rate": 7.683753387068894e-06, "loss": 0.735, "step": 5313 }, { "epoch": 0.34, "grad_norm": 1.0460576154264645, "learning_rate": 7.682878745063235e-06, "loss": 0.7425, "step": 5314 }, { "epoch": 0.34, "grad_norm": 2.3632349019343493, "learning_rate": 7.682003987753082e-06, "loss": 0.6921, "step": 5315 }, { "epoch": 0.34, "grad_norm": 1.9012015591033036, "learning_rate": 7.681129115176036e-06, "loss": 0.6678, "step": 5316 }, { "epoch": 0.34, "grad_norm": 1.7561704377290586, "learning_rate": 7.680254127369695e-06, "loss": 0.6812, "step": 5317 }, { "epoch": 0.34, "grad_norm": 3.0827311558352437, "learning_rate": 7.679379024371663e-06, "loss": 0.6862, "step": 5318 }, { "epoch": 0.34, "grad_norm": 1.50569282563038, "learning_rate": 7.67850380621955e-06, "loss": 0.7408, "step": 5319 }, { "epoch": 0.34, "grad_norm": 1.476886130933477, "learning_rate": 7.67762847295097e-06, "loss": 0.7713, "step": 5320 }, { "epoch": 0.34, "grad_norm": 1.5627361553616828, "learning_rate": 7.676753024603546e-06, "loss": 0.6977, "step": 5321 }, { "epoch": 0.34, "grad_norm": 1.4060727298173095, "learning_rate": 7.675877461214904e-06, "loss": 0.6598, "step": 5322 }, { "epoch": 0.34, "grad_norm": 1.6357850698030818, "learning_rate": 7.675001782822669e-06, "loss": 0.7652, "step": 5323 }, { "epoch": 0.34, "grad_norm": 1.4600124567661679, "learning_rate": 7.674125989464478e-06, "loss": 0.7203, "step": 5324 }, { "epoch": 0.34, "grad_norm": 1.5276693414903457, "learning_rate": 7.673250081177974e-06, "loss": 0.7546, "step": 5325 }, { "epoch": 0.34, "grad_norm": 1.670376727460352, "learning_rate": 7.672374058000795e-06, "loss": 0.7197, "step": 5326 }, { "epoch": 0.34, "grad_norm": 1.6357546289108091, "learning_rate": 7.671497919970598e-06, "loss": 0.6964, "step": 5327 }, { "epoch": 0.34, "grad_norm": 1.5758667848679797, "learning_rate": 7.670621667125031e-06, "loss": 0.7377, "step": 5328 }, { "epoch": 0.34, "grad_norm": 1.1530169062663869, "learning_rate": 7.669745299501758e-06, "loss": 0.5539, "step": 5329 }, { "epoch": 0.34, "grad_norm": 1.5068529989271575, "learning_rate": 7.668868817138442e-06, "loss": 0.7285, "step": 5330 }, { "epoch": 0.34, "grad_norm": 1.4821546479256313, "learning_rate": 7.667992220072751e-06, "loss": 0.5961, "step": 5331 }, { "epoch": 0.34, "grad_norm": 1.2216157622379669, "learning_rate": 7.667115508342362e-06, "loss": 0.6932, "step": 5332 }, { "epoch": 0.34, "grad_norm": 1.0834940925862206, "learning_rate": 7.666238681984952e-06, "loss": 0.588, "step": 5333 }, { "epoch": 0.34, "grad_norm": 1.7688727105870072, "learning_rate": 7.665361741038207e-06, "loss": 0.7502, "step": 5334 }, { "epoch": 0.34, "grad_norm": 1.3950490925016619, "learning_rate": 7.664484685539814e-06, "loss": 0.7317, "step": 5335 }, { "epoch": 0.34, "grad_norm": 1.8666520813606735, "learning_rate": 7.663607515527469e-06, "loss": 0.7515, "step": 5336 }, { "epoch": 0.34, "grad_norm": 1.5886560887942442, "learning_rate": 7.662730231038869e-06, "loss": 0.6936, "step": 5337 }, { "epoch": 0.34, "grad_norm": 1.7331113555877367, "learning_rate": 7.66185283211172e-06, "loss": 0.6922, "step": 5338 }, { "epoch": 0.34, "grad_norm": 1.4830309257535723, "learning_rate": 7.66097531878373e-06, "loss": 0.7384, "step": 5339 }, { "epoch": 0.34, "grad_norm": 1.416421293614683, "learning_rate": 7.66009769109261e-06, "loss": 0.6919, "step": 5340 }, { "epoch": 0.34, "grad_norm": 1.4164477193008662, "learning_rate": 7.659219949076082e-06, "loss": 0.6254, "step": 5341 }, { "epoch": 0.34, "grad_norm": 1.5807131296492898, "learning_rate": 7.658342092771869e-06, "loss": 0.7286, "step": 5342 }, { "epoch": 0.34, "grad_norm": 2.0961766231173518, "learning_rate": 7.657464122217699e-06, "loss": 0.653, "step": 5343 }, { "epoch": 0.34, "grad_norm": 1.429755051713098, "learning_rate": 7.656586037451303e-06, "loss": 0.7519, "step": 5344 }, { "epoch": 0.34, "grad_norm": 0.9907417487076075, "learning_rate": 7.655707838510423e-06, "loss": 0.6038, "step": 5345 }, { "epoch": 0.34, "grad_norm": 1.633478445266843, "learning_rate": 7.6548295254328e-06, "loss": 0.6566, "step": 5346 }, { "epoch": 0.34, "grad_norm": 2.0492063701656242, "learning_rate": 7.653951098256184e-06, "loss": 0.7542, "step": 5347 }, { "epoch": 0.34, "grad_norm": 1.4479961389667615, "learning_rate": 7.653072557018325e-06, "loss": 0.6849, "step": 5348 }, { "epoch": 0.34, "grad_norm": 1.525727646007557, "learning_rate": 7.652193901756983e-06, "loss": 0.6078, "step": 5349 }, { "epoch": 0.34, "grad_norm": 1.6009891319995702, "learning_rate": 7.65131513250992e-06, "loss": 0.7744, "step": 5350 }, { "epoch": 0.34, "grad_norm": 1.469811797547325, "learning_rate": 7.650436249314902e-06, "loss": 0.6802, "step": 5351 }, { "epoch": 0.34, "grad_norm": 1.5784892273906186, "learning_rate": 7.649557252209706e-06, "loss": 0.6833, "step": 5352 }, { "epoch": 0.34, "grad_norm": 1.7360917611610038, "learning_rate": 7.648678141232107e-06, "loss": 0.6807, "step": 5353 }, { "epoch": 0.34, "grad_norm": 1.508573043414473, "learning_rate": 7.647798916419885e-06, "loss": 0.627, "step": 5354 }, { "epoch": 0.34, "grad_norm": 1.4502573296047017, "learning_rate": 7.646919577810831e-06, "loss": 0.6521, "step": 5355 }, { "epoch": 0.34, "grad_norm": 1.7496494497862782, "learning_rate": 7.646040125442736e-06, "loss": 0.7342, "step": 5356 }, { "epoch": 0.34, "grad_norm": 1.168225111967063, "learning_rate": 7.645160559353392e-06, "loss": 0.6853, "step": 5357 }, { "epoch": 0.34, "grad_norm": 1.579305787507406, "learning_rate": 7.64428087958061e-06, "loss": 0.6736, "step": 5358 }, { "epoch": 0.34, "grad_norm": 1.2755767271669476, "learning_rate": 7.64340108616219e-06, "loss": 0.6616, "step": 5359 }, { "epoch": 0.34, "grad_norm": 1.4997780687230757, "learning_rate": 7.642521179135946e-06, "loss": 0.6589, "step": 5360 }, { "epoch": 0.34, "grad_norm": 1.8929232123524016, "learning_rate": 7.641641158539696e-06, "loss": 0.7017, "step": 5361 }, { "epoch": 0.34, "grad_norm": 1.5371220488202135, "learning_rate": 7.64076102441126e-06, "loss": 0.7252, "step": 5362 }, { "epoch": 0.34, "grad_norm": 1.5980878744476903, "learning_rate": 7.639880776788464e-06, "loss": 0.6771, "step": 5363 }, { "epoch": 0.34, "grad_norm": 1.158637468473046, "learning_rate": 7.639000415709138e-06, "loss": 0.652, "step": 5364 }, { "epoch": 0.34, "grad_norm": 1.5172286871629797, "learning_rate": 7.63811994121112e-06, "loss": 0.6976, "step": 5365 }, { "epoch": 0.34, "grad_norm": 1.463558135882178, "learning_rate": 7.63723935333225e-06, "loss": 0.6868, "step": 5366 }, { "epoch": 0.34, "grad_norm": 1.3225581463669354, "learning_rate": 7.636358652110375e-06, "loss": 0.6062, "step": 5367 }, { "epoch": 0.34, "grad_norm": 1.6159883448973489, "learning_rate": 7.635477837583343e-06, "loss": 0.7682, "step": 5368 }, { "epoch": 0.34, "grad_norm": 1.4433919299494828, "learning_rate": 7.634596909789014e-06, "loss": 0.6738, "step": 5369 }, { "epoch": 0.34, "grad_norm": 1.3236719188226185, "learning_rate": 7.633715868765245e-06, "loss": 0.6756, "step": 5370 }, { "epoch": 0.34, "grad_norm": 1.6975958413861918, "learning_rate": 7.632834714549902e-06, "loss": 0.718, "step": 5371 }, { "epoch": 0.34, "grad_norm": 1.5370331064259835, "learning_rate": 7.631953447180855e-06, "loss": 0.7427, "step": 5372 }, { "epoch": 0.34, "grad_norm": 1.7311830491836624, "learning_rate": 7.63107206669598e-06, "loss": 0.8123, "step": 5373 }, { "epoch": 0.34, "grad_norm": 1.5250640186597493, "learning_rate": 7.630190573133156e-06, "loss": 0.7206, "step": 5374 }, { "epoch": 0.34, "grad_norm": 1.4813273661488626, "learning_rate": 7.629308966530268e-06, "loss": 0.7313, "step": 5375 }, { "epoch": 0.34, "grad_norm": 1.839421793540649, "learning_rate": 7.628427246925205e-06, "loss": 0.8008, "step": 5376 }, { "epoch": 0.34, "grad_norm": 1.4710896514081657, "learning_rate": 7.6275454143558614e-06, "loss": 0.6707, "step": 5377 }, { "epoch": 0.34, "grad_norm": 1.4310867578606687, "learning_rate": 7.6266634688601385e-06, "loss": 0.6677, "step": 5378 }, { "epoch": 0.34, "grad_norm": 1.5557909912181789, "learning_rate": 7.6257814104759385e-06, "loss": 0.6536, "step": 5379 }, { "epoch": 0.34, "grad_norm": 1.4900282116169163, "learning_rate": 7.6248992392411705e-06, "loss": 0.6535, "step": 5380 }, { "epoch": 0.34, "grad_norm": 1.4332559047842588, "learning_rate": 7.62401695519375e-06, "loss": 0.7057, "step": 5381 }, { "epoch": 0.34, "grad_norm": 1.5973354052802236, "learning_rate": 7.623134558371594e-06, "loss": 0.7261, "step": 5382 }, { "epoch": 0.34, "grad_norm": 1.462550486657417, "learning_rate": 7.6222520488126286e-06, "loss": 0.6592, "step": 5383 }, { "epoch": 0.34, "grad_norm": 1.3750595729376065, "learning_rate": 7.621369426554778e-06, "loss": 0.6816, "step": 5384 }, { "epoch": 0.34, "grad_norm": 1.542477458823083, "learning_rate": 7.620486691635978e-06, "loss": 0.7106, "step": 5385 }, { "epoch": 0.34, "grad_norm": 1.4889821863945907, "learning_rate": 7.619603844094165e-06, "loss": 0.6732, "step": 5386 }, { "epoch": 0.34, "grad_norm": 1.5563037089107337, "learning_rate": 7.618720883967285e-06, "loss": 0.7014, "step": 5387 }, { "epoch": 0.34, "grad_norm": 1.4793039673844308, "learning_rate": 7.6178378112932845e-06, "loss": 0.7085, "step": 5388 }, { "epoch": 0.34, "grad_norm": 1.4416956907583907, "learning_rate": 7.616954626110116e-06, "loss": 0.6892, "step": 5389 }, { "epoch": 0.34, "grad_norm": 1.671101626235601, "learning_rate": 7.616071328455735e-06, "loss": 0.8137, "step": 5390 }, { "epoch": 0.35, "grad_norm": 1.5803926168595035, "learning_rate": 7.615187918368107e-06, "loss": 0.5835, "step": 5391 }, { "epoch": 0.35, "grad_norm": 1.7252454515043445, "learning_rate": 7.614304395885197e-06, "loss": 0.6768, "step": 5392 }, { "epoch": 0.35, "grad_norm": 1.7066139737164743, "learning_rate": 7.613420761044976e-06, "loss": 0.7633, "step": 5393 }, { "epoch": 0.35, "grad_norm": 1.3822051503373927, "learning_rate": 7.612537013885424e-06, "loss": 0.709, "step": 5394 }, { "epoch": 0.35, "grad_norm": 1.1415560142453685, "learning_rate": 7.611653154444521e-06, "loss": 0.6598, "step": 5395 }, { "epoch": 0.35, "grad_norm": 1.4487765570288167, "learning_rate": 7.610769182760251e-06, "loss": 0.7263, "step": 5396 }, { "epoch": 0.35, "grad_norm": 1.7393440696743434, "learning_rate": 7.60988509887061e-06, "loss": 0.7909, "step": 5397 }, { "epoch": 0.35, "grad_norm": 1.9548756260870792, "learning_rate": 7.60900090281359e-06, "loss": 0.7161, "step": 5398 }, { "epoch": 0.35, "grad_norm": 1.5432832518584623, "learning_rate": 7.608116594627195e-06, "loss": 0.7826, "step": 5399 }, { "epoch": 0.35, "grad_norm": 1.5179846720811274, "learning_rate": 7.607232174349428e-06, "loss": 0.663, "step": 5400 }, { "epoch": 0.35, "grad_norm": 1.1691444604328194, "learning_rate": 7.606347642018301e-06, "loss": 0.6672, "step": 5401 }, { "epoch": 0.35, "grad_norm": 1.4524614602208263, "learning_rate": 7.605462997671828e-06, "loss": 0.7767, "step": 5402 }, { "epoch": 0.35, "grad_norm": 1.5716877246493282, "learning_rate": 7.604578241348033e-06, "loss": 0.7341, "step": 5403 }, { "epoch": 0.35, "grad_norm": 1.014643010241256, "learning_rate": 7.603693373084936e-06, "loss": 0.5989, "step": 5404 }, { "epoch": 0.35, "grad_norm": 1.7777500560567545, "learning_rate": 7.602808392920568e-06, "loss": 0.7875, "step": 5405 }, { "epoch": 0.35, "grad_norm": 1.1364996190544556, "learning_rate": 7.601923300892966e-06, "loss": 0.6811, "step": 5406 }, { "epoch": 0.35, "grad_norm": 1.4797731743117146, "learning_rate": 7.601038097040169e-06, "loss": 0.6469, "step": 5407 }, { "epoch": 0.35, "grad_norm": 1.4644496839006993, "learning_rate": 7.600152781400218e-06, "loss": 0.69, "step": 5408 }, { "epoch": 0.35, "grad_norm": 1.8560098274649768, "learning_rate": 7.599267354011166e-06, "loss": 0.6827, "step": 5409 }, { "epoch": 0.35, "grad_norm": 1.4448115258376577, "learning_rate": 7.598381814911063e-06, "loss": 0.797, "step": 5410 }, { "epoch": 0.35, "grad_norm": 1.2503164121733275, "learning_rate": 7.597496164137972e-06, "loss": 0.7552, "step": 5411 }, { "epoch": 0.35, "grad_norm": 1.2098391830531707, "learning_rate": 7.596610401729951e-06, "loss": 0.748, "step": 5412 }, { "epoch": 0.35, "grad_norm": 1.5738461607262335, "learning_rate": 7.595724527725074e-06, "loss": 0.7313, "step": 5413 }, { "epoch": 0.35, "grad_norm": 1.4941489875215073, "learning_rate": 7.594838542161409e-06, "loss": 0.6449, "step": 5414 }, { "epoch": 0.35, "grad_norm": 1.0796237080419566, "learning_rate": 7.593952445077035e-06, "loss": 0.6576, "step": 5415 }, { "epoch": 0.35, "grad_norm": 1.3011457395007469, "learning_rate": 7.5930662365100385e-06, "loss": 0.6357, "step": 5416 }, { "epoch": 0.35, "grad_norm": 1.4687399596757946, "learning_rate": 7.592179916498503e-06, "loss": 0.7193, "step": 5417 }, { "epoch": 0.35, "grad_norm": 1.3913807153583677, "learning_rate": 7.591293485080522e-06, "loss": 0.6382, "step": 5418 }, { "epoch": 0.35, "grad_norm": 1.2494671830748403, "learning_rate": 7.590406942294191e-06, "loss": 0.7026, "step": 5419 }, { "epoch": 0.35, "grad_norm": 1.587092662346476, "learning_rate": 7.589520288177614e-06, "loss": 0.7469, "step": 5420 }, { "epoch": 0.35, "grad_norm": 1.5560896610133932, "learning_rate": 7.588633522768896e-06, "loss": 0.6635, "step": 5421 }, { "epoch": 0.35, "grad_norm": 1.6227828504084496, "learning_rate": 7.587746646106147e-06, "loss": 0.7027, "step": 5422 }, { "epoch": 0.35, "grad_norm": 1.1885548093287421, "learning_rate": 7.586859658227487e-06, "loss": 0.6021, "step": 5423 }, { "epoch": 0.35, "grad_norm": 1.5269805886838639, "learning_rate": 7.585972559171033e-06, "loss": 0.7337, "step": 5424 }, { "epoch": 0.35, "grad_norm": 1.4322456054592219, "learning_rate": 7.585085348974912e-06, "loss": 0.6506, "step": 5425 }, { "epoch": 0.35, "grad_norm": 1.180659740472977, "learning_rate": 7.584198027677256e-06, "loss": 0.7249, "step": 5426 }, { "epoch": 0.35, "grad_norm": 1.441238912823567, "learning_rate": 7.5833105953161986e-06, "loss": 0.674, "step": 5427 }, { "epoch": 0.35, "grad_norm": 1.6479880200658197, "learning_rate": 7.5824230519298816e-06, "loss": 0.6598, "step": 5428 }, { "epoch": 0.35, "grad_norm": 1.4744633818524848, "learning_rate": 7.581535397556445e-06, "loss": 0.6601, "step": 5429 }, { "epoch": 0.35, "grad_norm": 1.3930755758328714, "learning_rate": 7.5806476322340436e-06, "loss": 0.699, "step": 5430 }, { "epoch": 0.35, "grad_norm": 1.5246390573594755, "learning_rate": 7.579759756000829e-06, "loss": 0.6687, "step": 5431 }, { "epoch": 0.35, "grad_norm": 1.487535054134238, "learning_rate": 7.578871768894962e-06, "loss": 0.6621, "step": 5432 }, { "epoch": 0.35, "grad_norm": 1.8042881666140207, "learning_rate": 7.577983670954604e-06, "loss": 0.7029, "step": 5433 }, { "epoch": 0.35, "grad_norm": 1.50640755784876, "learning_rate": 7.577095462217926e-06, "loss": 0.6275, "step": 5434 }, { "epoch": 0.35, "grad_norm": 1.5686308377708609, "learning_rate": 7.5762071427231e-06, "loss": 0.8026, "step": 5435 }, { "epoch": 0.35, "grad_norm": 1.613489385691158, "learning_rate": 7.575318712508304e-06, "loss": 0.7089, "step": 5436 }, { "epoch": 0.35, "grad_norm": 1.5648134342435662, "learning_rate": 7.5744301716117206e-06, "loss": 0.7324, "step": 5437 }, { "epoch": 0.35, "grad_norm": 1.511816965026153, "learning_rate": 7.573541520071539e-06, "loss": 0.7007, "step": 5438 }, { "epoch": 0.35, "grad_norm": 1.69200701878625, "learning_rate": 7.572652757925951e-06, "loss": 0.7356, "step": 5439 }, { "epoch": 0.35, "grad_norm": 1.4469096386889326, "learning_rate": 7.571763885213153e-06, "loss": 0.7045, "step": 5440 }, { "epoch": 0.35, "grad_norm": 1.426715887581308, "learning_rate": 7.570874901971347e-06, "loss": 0.6795, "step": 5441 }, { "epoch": 0.35, "grad_norm": 1.6389073935199507, "learning_rate": 7.569985808238739e-06, "loss": 0.7459, "step": 5442 }, { "epoch": 0.35, "grad_norm": 1.5855020603861874, "learning_rate": 7.569096604053543e-06, "loss": 0.7056, "step": 5443 }, { "epoch": 0.35, "grad_norm": 1.615150179440049, "learning_rate": 7.56820728945397e-06, "loss": 0.7377, "step": 5444 }, { "epoch": 0.35, "grad_norm": 1.5301797223652474, "learning_rate": 7.5673178644782475e-06, "loss": 0.7142, "step": 5445 }, { "epoch": 0.35, "grad_norm": 1.5495802304734285, "learning_rate": 7.566428329164598e-06, "loss": 0.7567, "step": 5446 }, { "epoch": 0.35, "grad_norm": 1.654992862613468, "learning_rate": 7.565538683551251e-06, "loss": 0.8031, "step": 5447 }, { "epoch": 0.35, "grad_norm": 1.7737497892176795, "learning_rate": 7.564648927676443e-06, "loss": 0.7581, "step": 5448 }, { "epoch": 0.35, "grad_norm": 1.407130546866074, "learning_rate": 7.563759061578412e-06, "loss": 0.7092, "step": 5449 }, { "epoch": 0.35, "grad_norm": 1.5318790127559783, "learning_rate": 7.562869085295403e-06, "loss": 0.7156, "step": 5450 }, { "epoch": 0.35, "grad_norm": 1.4418984815838862, "learning_rate": 7.561978998865667e-06, "loss": 0.6565, "step": 5451 }, { "epoch": 0.35, "grad_norm": 1.5613137294003054, "learning_rate": 7.561088802327456e-06, "loss": 0.7602, "step": 5452 }, { "epoch": 0.35, "grad_norm": 1.4940777872063122, "learning_rate": 7.560198495719032e-06, "loss": 0.7284, "step": 5453 }, { "epoch": 0.35, "grad_norm": 1.4314350885197664, "learning_rate": 7.5593080790786554e-06, "loss": 0.7176, "step": 5454 }, { "epoch": 0.35, "grad_norm": 1.4597590043309945, "learning_rate": 7.558417552444595e-06, "loss": 0.7062, "step": 5455 }, { "epoch": 0.35, "grad_norm": 1.584103310126017, "learning_rate": 7.557526915855126e-06, "loss": 0.7446, "step": 5456 }, { "epoch": 0.35, "grad_norm": 1.6180948169618619, "learning_rate": 7.556636169348524e-06, "loss": 0.6665, "step": 5457 }, { "epoch": 0.35, "grad_norm": 1.5171984311599134, "learning_rate": 7.555745312963068e-06, "loss": 0.6301, "step": 5458 }, { "epoch": 0.35, "grad_norm": 1.583478259057515, "learning_rate": 7.554854346737053e-06, "loss": 0.7512, "step": 5459 }, { "epoch": 0.35, "grad_norm": 1.1092761243319107, "learning_rate": 7.553963270708764e-06, "loss": 0.726, "step": 5460 }, { "epoch": 0.35, "grad_norm": 1.4015621980272168, "learning_rate": 7.5530720849165e-06, "loss": 0.6296, "step": 5461 }, { "epoch": 0.35, "grad_norm": 1.6333231858882367, "learning_rate": 7.5521807893985645e-06, "loss": 0.7459, "step": 5462 }, { "epoch": 0.35, "grad_norm": 1.3250821064289235, "learning_rate": 7.551289384193261e-06, "loss": 0.7304, "step": 5463 }, { "epoch": 0.35, "grad_norm": 1.401498232270702, "learning_rate": 7.550397869338902e-06, "loss": 0.6385, "step": 5464 }, { "epoch": 0.35, "grad_norm": 1.2067636319564932, "learning_rate": 7.549506244873799e-06, "loss": 0.5935, "step": 5465 }, { "epoch": 0.35, "grad_norm": 1.41020191140338, "learning_rate": 7.548614510836276e-06, "loss": 0.6112, "step": 5466 }, { "epoch": 0.35, "grad_norm": 1.8599262138053425, "learning_rate": 7.547722667264656e-06, "loss": 0.7527, "step": 5467 }, { "epoch": 0.35, "grad_norm": 1.608666013124971, "learning_rate": 7.546830714197271e-06, "loss": 0.7417, "step": 5468 }, { "epoch": 0.35, "grad_norm": 1.585878519943862, "learning_rate": 7.545938651672453e-06, "loss": 0.6872, "step": 5469 }, { "epoch": 0.35, "grad_norm": 1.5604081401877006, "learning_rate": 7.54504647972854e-06, "loss": 0.722, "step": 5470 }, { "epoch": 0.35, "grad_norm": 1.4659118284088093, "learning_rate": 7.5441541984038795e-06, "loss": 0.7405, "step": 5471 }, { "epoch": 0.35, "grad_norm": 1.610366251214804, "learning_rate": 7.543261807736815e-06, "loss": 0.7276, "step": 5472 }, { "epoch": 0.35, "grad_norm": 1.7590256407077358, "learning_rate": 7.542369307765705e-06, "loss": 0.6779, "step": 5473 }, { "epoch": 0.35, "grad_norm": 1.3572579306033, "learning_rate": 7.541476698528902e-06, "loss": 0.6283, "step": 5474 }, { "epoch": 0.35, "grad_norm": 1.6647849806028063, "learning_rate": 7.540583980064772e-06, "loss": 0.688, "step": 5475 }, { "epoch": 0.35, "grad_norm": 1.568479793994202, "learning_rate": 7.539691152411681e-06, "loss": 0.6189, "step": 5476 }, { "epoch": 0.35, "grad_norm": 1.4258251257919017, "learning_rate": 7.538798215608001e-06, "loss": 0.6225, "step": 5477 }, { "epoch": 0.35, "grad_norm": 1.9140315510967016, "learning_rate": 7.537905169692108e-06, "loss": 0.6967, "step": 5478 }, { "epoch": 0.35, "grad_norm": 1.3691635498477743, "learning_rate": 7.537012014702383e-06, "loss": 0.598, "step": 5479 }, { "epoch": 0.35, "grad_norm": 1.6912915057728308, "learning_rate": 7.536118750677212e-06, "loss": 0.6468, "step": 5480 }, { "epoch": 0.35, "grad_norm": 1.6728017240428366, "learning_rate": 7.535225377654989e-06, "loss": 0.742, "step": 5481 }, { "epoch": 0.35, "grad_norm": 1.5305513123990222, "learning_rate": 7.5343318956741054e-06, "loss": 0.8208, "step": 5482 }, { "epoch": 0.35, "grad_norm": 1.546630212384991, "learning_rate": 7.533438304772962e-06, "loss": 0.6944, "step": 5483 }, { "epoch": 0.35, "grad_norm": 1.5838601318343943, "learning_rate": 7.5325446049899646e-06, "loss": 0.7075, "step": 5484 }, { "epoch": 0.35, "grad_norm": 1.336267550000704, "learning_rate": 7.531650796363521e-06, "loss": 0.6837, "step": 5485 }, { "epoch": 0.35, "grad_norm": 1.7951919016292248, "learning_rate": 7.530756878932045e-06, "loss": 0.7603, "step": 5486 }, { "epoch": 0.35, "grad_norm": 1.438727953523329, "learning_rate": 7.529862852733958e-06, "loss": 0.6591, "step": 5487 }, { "epoch": 0.35, "grad_norm": 1.4662367439439243, "learning_rate": 7.528968717807678e-06, "loss": 0.7187, "step": 5488 }, { "epoch": 0.35, "grad_norm": 1.5039443333404194, "learning_rate": 7.52807447419164e-06, "loss": 0.6893, "step": 5489 }, { "epoch": 0.35, "grad_norm": 1.4739087843387955, "learning_rate": 7.527180121924274e-06, "loss": 0.7293, "step": 5490 }, { "epoch": 0.35, "grad_norm": 1.1921506075718096, "learning_rate": 7.526285661044015e-06, "loss": 0.6536, "step": 5491 }, { "epoch": 0.35, "grad_norm": 1.4117849854802997, "learning_rate": 7.525391091589307e-06, "loss": 0.611, "step": 5492 }, { "epoch": 0.35, "grad_norm": 1.395734791916068, "learning_rate": 7.524496413598596e-06, "loss": 0.6261, "step": 5493 }, { "epoch": 0.35, "grad_norm": 1.4608744249255936, "learning_rate": 7.523601627110333e-06, "loss": 0.5993, "step": 5494 }, { "epoch": 0.35, "grad_norm": 2.3172784674751155, "learning_rate": 7.522706732162976e-06, "loss": 0.6979, "step": 5495 }, { "epoch": 0.35, "grad_norm": 1.711601958070632, "learning_rate": 7.521811728794985e-06, "loss": 0.7577, "step": 5496 }, { "epoch": 0.35, "grad_norm": 1.442998670804171, "learning_rate": 7.520916617044826e-06, "loss": 0.6502, "step": 5497 }, { "epoch": 0.35, "grad_norm": 1.4358218398212748, "learning_rate": 7.520021396950965e-06, "loss": 0.7961, "step": 5498 }, { "epoch": 0.35, "grad_norm": 1.4896261857052864, "learning_rate": 7.519126068551882e-06, "loss": 0.7091, "step": 5499 }, { "epoch": 0.35, "grad_norm": 1.4171454767646585, "learning_rate": 7.518230631886054e-06, "loss": 0.7152, "step": 5500 }, { "epoch": 0.35, "grad_norm": 1.5358725682293703, "learning_rate": 7.517335086991963e-06, "loss": 0.735, "step": 5501 }, { "epoch": 0.35, "grad_norm": 1.4103699266933754, "learning_rate": 7.5164394339081006e-06, "loss": 0.7361, "step": 5502 }, { "epoch": 0.35, "grad_norm": 1.4470949895142615, "learning_rate": 7.51554367267296e-06, "loss": 0.7235, "step": 5503 }, { "epoch": 0.35, "grad_norm": 1.4218955476871302, "learning_rate": 7.514647803325038e-06, "loss": 0.6863, "step": 5504 }, { "epoch": 0.35, "grad_norm": 1.5620319182179498, "learning_rate": 7.513751825902837e-06, "loss": 0.729, "step": 5505 }, { "epoch": 0.35, "grad_norm": 1.7221611261463818, "learning_rate": 7.512855740444865e-06, "loss": 0.7271, "step": 5506 }, { "epoch": 0.35, "grad_norm": 1.6738180152947908, "learning_rate": 7.5119595469896325e-06, "loss": 0.667, "step": 5507 }, { "epoch": 0.35, "grad_norm": 1.548676510865462, "learning_rate": 7.511063245575656e-06, "loss": 0.7791, "step": 5508 }, { "epoch": 0.35, "grad_norm": 1.2141909222921152, "learning_rate": 7.51016683624146e-06, "loss": 0.6432, "step": 5509 }, { "epoch": 0.35, "grad_norm": 1.3384214963341856, "learning_rate": 7.5092703190255675e-06, "loss": 0.5908, "step": 5510 }, { "epoch": 0.35, "grad_norm": 1.475851820946528, "learning_rate": 7.50837369396651e-06, "loss": 0.6589, "step": 5511 }, { "epoch": 0.35, "grad_norm": 1.7356803099868063, "learning_rate": 7.507476961102822e-06, "loss": 0.7663, "step": 5512 }, { "epoch": 0.35, "grad_norm": 1.5585112059027744, "learning_rate": 7.506580120473042e-06, "loss": 0.6754, "step": 5513 }, { "epoch": 0.35, "grad_norm": 1.0279077296447745, "learning_rate": 7.505683172115714e-06, "loss": 0.6901, "step": 5514 }, { "epoch": 0.35, "grad_norm": 1.6771202524637348, "learning_rate": 7.504786116069391e-06, "loss": 0.7468, "step": 5515 }, { "epoch": 0.35, "grad_norm": 1.4363227872280377, "learning_rate": 7.5038889523726225e-06, "loss": 0.7343, "step": 5516 }, { "epoch": 0.35, "grad_norm": 1.4776318428732451, "learning_rate": 7.502991681063969e-06, "loss": 0.6988, "step": 5517 }, { "epoch": 0.35, "grad_norm": 1.6359794383634405, "learning_rate": 7.502094302181992e-06, "loss": 0.7224, "step": 5518 }, { "epoch": 0.35, "grad_norm": 1.5958319143604736, "learning_rate": 7.5011968157652615e-06, "loss": 0.5983, "step": 5519 }, { "epoch": 0.35, "grad_norm": 1.5506121518319727, "learning_rate": 7.500299221852346e-06, "loss": 0.7525, "step": 5520 }, { "epoch": 0.35, "grad_norm": 1.547783084198684, "learning_rate": 7.499401520481824e-06, "loss": 0.7137, "step": 5521 }, { "epoch": 0.35, "grad_norm": 1.062283641715782, "learning_rate": 7.498503711692276e-06, "loss": 0.6576, "step": 5522 }, { "epoch": 0.35, "grad_norm": 1.4514199388297129, "learning_rate": 7.497605795522289e-06, "loss": 0.7815, "step": 5523 }, { "epoch": 0.35, "grad_norm": 1.7234422299239653, "learning_rate": 7.4967077720104545e-06, "loss": 0.6646, "step": 5524 }, { "epoch": 0.35, "grad_norm": 1.660149923176921, "learning_rate": 7.495809641195364e-06, "loss": 0.7433, "step": 5525 }, { "epoch": 0.35, "grad_norm": 1.4292973695441895, "learning_rate": 7.49491140311562e-06, "loss": 0.6412, "step": 5526 }, { "epoch": 0.35, "grad_norm": 0.9976512296961587, "learning_rate": 7.494013057809827e-06, "loss": 0.6299, "step": 5527 }, { "epoch": 0.35, "grad_norm": 1.4226231974345476, "learning_rate": 7.493114605316594e-06, "loss": 0.7584, "step": 5528 }, { "epoch": 0.35, "grad_norm": 1.5827281711537982, "learning_rate": 7.492216045674532e-06, "loss": 0.6858, "step": 5529 }, { "epoch": 0.35, "grad_norm": 1.419680266037333, "learning_rate": 7.491317378922263e-06, "loss": 0.6911, "step": 5530 }, { "epoch": 0.35, "grad_norm": 1.4797690440919824, "learning_rate": 7.490418605098408e-06, "loss": 0.6489, "step": 5531 }, { "epoch": 0.35, "grad_norm": 1.3310676765783047, "learning_rate": 7.489519724241594e-06, "loss": 0.7239, "step": 5532 }, { "epoch": 0.35, "grad_norm": 1.2821585626497607, "learning_rate": 7.488620736390454e-06, "loss": 0.7092, "step": 5533 }, { "epoch": 0.35, "grad_norm": 1.5453177937825562, "learning_rate": 7.487721641583624e-06, "loss": 0.698, "step": 5534 }, { "epoch": 0.35, "grad_norm": 1.0356467417986563, "learning_rate": 7.486822439859744e-06, "loss": 0.7203, "step": 5535 }, { "epoch": 0.35, "grad_norm": 1.5745679534467287, "learning_rate": 7.485923131257462e-06, "loss": 0.6818, "step": 5536 }, { "epoch": 0.35, "grad_norm": 1.3905533675499202, "learning_rate": 7.485023715815427e-06, "loss": 0.6472, "step": 5537 }, { "epoch": 0.35, "grad_norm": 1.4093177501832344, "learning_rate": 7.484124193572295e-06, "loss": 0.6451, "step": 5538 }, { "epoch": 0.35, "grad_norm": 1.4137409140245856, "learning_rate": 7.483224564566725e-06, "loss": 0.6302, "step": 5539 }, { "epoch": 0.35, "grad_norm": 1.34088268140079, "learning_rate": 7.482324828837382e-06, "loss": 0.7395, "step": 5540 }, { "epoch": 0.35, "grad_norm": 1.89351789516838, "learning_rate": 7.481424986422933e-06, "loss": 0.7763, "step": 5541 }, { "epoch": 0.35, "grad_norm": 1.5614913895005849, "learning_rate": 7.480525037362052e-06, "loss": 0.695, "step": 5542 }, { "epoch": 0.35, "grad_norm": 1.6290620065495995, "learning_rate": 7.479624981693416e-06, "loss": 0.7817, "step": 5543 }, { "epoch": 0.35, "grad_norm": 1.7035183050030025, "learning_rate": 7.478724819455709e-06, "loss": 0.7975, "step": 5544 }, { "epoch": 0.35, "grad_norm": 1.3486922528380023, "learning_rate": 7.47782455068762e-06, "loss": 0.7104, "step": 5545 }, { "epoch": 0.35, "grad_norm": 1.7922613410836015, "learning_rate": 7.476924175427838e-06, "loss": 0.7313, "step": 5546 }, { "epoch": 0.36, "grad_norm": 1.439284149215755, "learning_rate": 7.476023693715059e-06, "loss": 0.7649, "step": 5547 }, { "epoch": 0.36, "grad_norm": 1.0752523339813278, "learning_rate": 7.475123105587985e-06, "loss": 0.6821, "step": 5548 }, { "epoch": 0.36, "grad_norm": 1.524375008820587, "learning_rate": 7.47422241108532e-06, "loss": 0.6624, "step": 5549 }, { "epoch": 0.36, "grad_norm": 1.4878956761962365, "learning_rate": 7.473321610245774e-06, "loss": 0.6734, "step": 5550 }, { "epoch": 0.36, "grad_norm": 1.4827045229475717, "learning_rate": 7.4724207031080645e-06, "loss": 0.6916, "step": 5551 }, { "epoch": 0.36, "grad_norm": 1.4408628156153955, "learning_rate": 7.471519689710908e-06, "loss": 0.7066, "step": 5552 }, { "epoch": 0.36, "grad_norm": 1.5760016673963488, "learning_rate": 7.4706185700930266e-06, "loss": 0.7082, "step": 5553 }, { "epoch": 0.36, "grad_norm": 1.1238303189845196, "learning_rate": 7.469717344293152e-06, "loss": 0.6424, "step": 5554 }, { "epoch": 0.36, "grad_norm": 1.5041797788702653, "learning_rate": 7.4688160123500165e-06, "loss": 0.7067, "step": 5555 }, { "epoch": 0.36, "grad_norm": 0.9737685030683345, "learning_rate": 7.467914574302356e-06, "loss": 0.606, "step": 5556 }, { "epoch": 0.36, "grad_norm": 1.1766559462474475, "learning_rate": 7.467013030188912e-06, "loss": 0.6586, "step": 5557 }, { "epoch": 0.36, "grad_norm": 1.1361026916423884, "learning_rate": 7.466111380048432e-06, "loss": 0.5126, "step": 5558 }, { "epoch": 0.36, "grad_norm": 1.4201964694880658, "learning_rate": 7.465209623919668e-06, "loss": 0.6928, "step": 5559 }, { "epoch": 0.36, "grad_norm": 1.6450296671767526, "learning_rate": 7.464307761841374e-06, "loss": 0.6466, "step": 5560 }, { "epoch": 0.36, "grad_norm": 1.6008620580611117, "learning_rate": 7.46340579385231e-06, "loss": 0.7612, "step": 5561 }, { "epoch": 0.36, "grad_norm": 1.4495626401639405, "learning_rate": 7.462503719991241e-06, "loss": 0.7226, "step": 5562 }, { "epoch": 0.36, "grad_norm": 1.4588927800220157, "learning_rate": 7.4616015402969375e-06, "loss": 0.7514, "step": 5563 }, { "epoch": 0.36, "grad_norm": 1.3999568457195937, "learning_rate": 7.460699254808172e-06, "loss": 0.7196, "step": 5564 }, { "epoch": 0.36, "grad_norm": 1.642357974413348, "learning_rate": 7.459796863563723e-06, "loss": 0.7221, "step": 5565 }, { "epoch": 0.36, "grad_norm": 1.4551804258382692, "learning_rate": 7.458894366602374e-06, "loss": 0.6845, "step": 5566 }, { "epoch": 0.36, "grad_norm": 1.3675520379251396, "learning_rate": 7.457991763962912e-06, "loss": 0.6537, "step": 5567 }, { "epoch": 0.36, "grad_norm": 1.5223508932229914, "learning_rate": 7.457089055684129e-06, "loss": 0.717, "step": 5568 }, { "epoch": 0.36, "grad_norm": 1.3163543536549542, "learning_rate": 7.456186241804821e-06, "loss": 0.7146, "step": 5569 }, { "epoch": 0.36, "grad_norm": 1.4052523771173882, "learning_rate": 7.45528332236379e-06, "loss": 0.6603, "step": 5570 }, { "epoch": 0.36, "grad_norm": 1.786959604475064, "learning_rate": 7.45438029739984e-06, "loss": 0.7366, "step": 5571 }, { "epoch": 0.36, "grad_norm": 1.5403555090767336, "learning_rate": 7.453477166951783e-06, "loss": 0.7695, "step": 5572 }, { "epoch": 0.36, "grad_norm": 1.5606614956320188, "learning_rate": 7.4525739310584326e-06, "loss": 0.5973, "step": 5573 }, { "epoch": 0.36, "grad_norm": 1.7014971738357554, "learning_rate": 7.451670589758609e-06, "loss": 0.6658, "step": 5574 }, { "epoch": 0.36, "grad_norm": 1.431853772033403, "learning_rate": 7.450767143091133e-06, "loss": 0.6496, "step": 5575 }, { "epoch": 0.36, "grad_norm": 1.5601074642117858, "learning_rate": 7.4498635910948365e-06, "loss": 0.7558, "step": 5576 }, { "epoch": 0.36, "grad_norm": 2.5729518509107017, "learning_rate": 7.448959933808552e-06, "loss": 0.6611, "step": 5577 }, { "epoch": 0.36, "grad_norm": 1.3526225719737373, "learning_rate": 7.448056171271114e-06, "loss": 0.7452, "step": 5578 }, { "epoch": 0.36, "grad_norm": 1.8167794981555088, "learning_rate": 7.447152303521366e-06, "loss": 0.7957, "step": 5579 }, { "epoch": 0.36, "grad_norm": 2.123418453375821, "learning_rate": 7.446248330598154e-06, "loss": 0.6443, "step": 5580 }, { "epoch": 0.36, "grad_norm": 1.5133446014092544, "learning_rate": 7.445344252540327e-06, "loss": 0.7065, "step": 5581 }, { "epoch": 0.36, "grad_norm": 1.8289780301847127, "learning_rate": 7.444440069386746e-06, "loss": 0.8005, "step": 5582 }, { "epoch": 0.36, "grad_norm": 1.6112896367589926, "learning_rate": 7.443535781176265e-06, "loss": 0.6915, "step": 5583 }, { "epoch": 0.36, "grad_norm": 1.386948533679249, "learning_rate": 7.442631387947753e-06, "loss": 0.7001, "step": 5584 }, { "epoch": 0.36, "grad_norm": 1.7185249370544116, "learning_rate": 7.441726889740075e-06, "loss": 0.706, "step": 5585 }, { "epoch": 0.36, "grad_norm": 1.7777753249499795, "learning_rate": 7.440822286592105e-06, "loss": 0.7218, "step": 5586 }, { "epoch": 0.36, "grad_norm": 1.5600538238501902, "learning_rate": 7.439917578542723e-06, "loss": 0.6912, "step": 5587 }, { "epoch": 0.36, "grad_norm": 1.5600445087409924, "learning_rate": 7.4390127656308116e-06, "loss": 0.6859, "step": 5588 }, { "epoch": 0.36, "grad_norm": 1.915789834406451, "learning_rate": 7.438107847895256e-06, "loss": 0.7433, "step": 5589 }, { "epoch": 0.36, "grad_norm": 1.737127529357273, "learning_rate": 7.437202825374948e-06, "loss": 0.7734, "step": 5590 }, { "epoch": 0.36, "grad_norm": 1.5833313976208299, "learning_rate": 7.436297698108783e-06, "loss": 0.8026, "step": 5591 }, { "epoch": 0.36, "grad_norm": 1.4537462152047074, "learning_rate": 7.435392466135665e-06, "loss": 0.7033, "step": 5592 }, { "epoch": 0.36, "grad_norm": 1.5303040781060848, "learning_rate": 7.434487129494494e-06, "loss": 0.599, "step": 5593 }, { "epoch": 0.36, "grad_norm": 1.5227662065592904, "learning_rate": 7.433581688224181e-06, "loss": 0.7308, "step": 5594 }, { "epoch": 0.36, "grad_norm": 1.4516627142883418, "learning_rate": 7.4326761423636415e-06, "loss": 0.7226, "step": 5595 }, { "epoch": 0.36, "grad_norm": 1.721403296656722, "learning_rate": 7.431770491951794e-06, "loss": 0.8009, "step": 5596 }, { "epoch": 0.36, "grad_norm": 1.4563038899760332, "learning_rate": 7.430864737027561e-06, "loss": 0.6176, "step": 5597 }, { "epoch": 0.36, "grad_norm": 1.5854248697811943, "learning_rate": 7.429958877629868e-06, "loss": 0.7347, "step": 5598 }, { "epoch": 0.36, "grad_norm": 1.72208081617444, "learning_rate": 7.429052913797649e-06, "loss": 0.7162, "step": 5599 }, { "epoch": 0.36, "grad_norm": 1.2582267831597118, "learning_rate": 7.42814684556984e-06, "loss": 0.5776, "step": 5600 }, { "epoch": 0.36, "grad_norm": 1.5745161588363843, "learning_rate": 7.4272406729853805e-06, "loss": 0.6617, "step": 5601 }, { "epoch": 0.36, "grad_norm": 1.1620138288731183, "learning_rate": 7.426334396083219e-06, "loss": 0.6887, "step": 5602 }, { "epoch": 0.36, "grad_norm": 1.9026025256851018, "learning_rate": 7.425428014902304e-06, "loss": 0.7545, "step": 5603 }, { "epoch": 0.36, "grad_norm": 1.5634659688381527, "learning_rate": 7.4245215294815885e-06, "loss": 0.7851, "step": 5604 }, { "epoch": 0.36, "grad_norm": 1.3661045076368383, "learning_rate": 7.423614939860033e-06, "loss": 0.6969, "step": 5605 }, { "epoch": 0.36, "grad_norm": 1.3092781622405354, "learning_rate": 7.4227082460765995e-06, "loss": 0.6622, "step": 5606 }, { "epoch": 0.36, "grad_norm": 1.7745480867914116, "learning_rate": 7.421801448170256e-06, "loss": 0.7527, "step": 5607 }, { "epoch": 0.36, "grad_norm": 1.6084894920380899, "learning_rate": 7.420894546179975e-06, "loss": 0.6883, "step": 5608 }, { "epoch": 0.36, "grad_norm": 1.5393082907837008, "learning_rate": 7.419987540144733e-06, "loss": 0.6667, "step": 5609 }, { "epoch": 0.36, "grad_norm": 1.495104158384519, "learning_rate": 7.4190804301035136e-06, "loss": 0.705, "step": 5610 }, { "epoch": 0.36, "grad_norm": 1.7554242664418518, "learning_rate": 7.4181732160952995e-06, "loss": 0.7152, "step": 5611 }, { "epoch": 0.36, "grad_norm": 1.7229542487845084, "learning_rate": 7.417265898159082e-06, "loss": 0.7769, "step": 5612 }, { "epoch": 0.36, "grad_norm": 1.505145106132622, "learning_rate": 7.416358476333856e-06, "loss": 0.7401, "step": 5613 }, { "epoch": 0.36, "grad_norm": 1.4418316667574278, "learning_rate": 7.4154509506586195e-06, "loss": 0.6581, "step": 5614 }, { "epoch": 0.36, "grad_norm": 1.6977578877152866, "learning_rate": 7.414543321172376e-06, "loss": 0.713, "step": 5615 }, { "epoch": 0.36, "grad_norm": 1.6611551421333495, "learning_rate": 7.413635587914134e-06, "loss": 0.7634, "step": 5616 }, { "epoch": 0.36, "grad_norm": 1.5461315718189292, "learning_rate": 7.412727750922908e-06, "loss": 0.6549, "step": 5617 }, { "epoch": 0.36, "grad_norm": 1.512889330870845, "learning_rate": 7.411819810237712e-06, "loss": 0.6367, "step": 5618 }, { "epoch": 0.36, "grad_norm": 1.5182697202171598, "learning_rate": 7.4109117658975685e-06, "loss": 0.7119, "step": 5619 }, { "epoch": 0.36, "grad_norm": 1.3661148629365494, "learning_rate": 7.4100036179415035e-06, "loss": 0.672, "step": 5620 }, { "epoch": 0.36, "grad_norm": 1.4941519150375573, "learning_rate": 7.409095366408547e-06, "loss": 0.729, "step": 5621 }, { "epoch": 0.36, "grad_norm": 1.4312769589936285, "learning_rate": 7.408187011337734e-06, "loss": 0.7332, "step": 5622 }, { "epoch": 0.36, "grad_norm": 1.429024401385058, "learning_rate": 7.407278552768103e-06, "loss": 0.6868, "step": 5623 }, { "epoch": 0.36, "grad_norm": 1.1504258777575447, "learning_rate": 7.4063699907387e-06, "loss": 0.6481, "step": 5624 }, { "epoch": 0.36, "grad_norm": 1.4836608395546897, "learning_rate": 7.40546132528857e-06, "loss": 0.6442, "step": 5625 }, { "epoch": 0.36, "grad_norm": 1.5078906227962299, "learning_rate": 7.404552556456768e-06, "loss": 0.6969, "step": 5626 }, { "epoch": 0.36, "grad_norm": 1.6011815317232012, "learning_rate": 7.403643684282347e-06, "loss": 0.6906, "step": 5627 }, { "epoch": 0.36, "grad_norm": 1.6134121212796864, "learning_rate": 7.402734708804373e-06, "loss": 0.6956, "step": 5628 }, { "epoch": 0.36, "grad_norm": 1.628796384371602, "learning_rate": 7.40182563006191e-06, "loss": 0.6843, "step": 5629 }, { "epoch": 0.36, "grad_norm": 1.4452207557896493, "learning_rate": 7.4009164480940275e-06, "loss": 0.7054, "step": 5630 }, { "epoch": 0.36, "grad_norm": 2.12984791218447, "learning_rate": 7.4000071629398015e-06, "loss": 0.7609, "step": 5631 }, { "epoch": 0.36, "grad_norm": 1.1473655553864588, "learning_rate": 7.399097774638312e-06, "loss": 0.6894, "step": 5632 }, { "epoch": 0.36, "grad_norm": 1.5913902454387747, "learning_rate": 7.398188283228641e-06, "loss": 0.6912, "step": 5633 }, { "epoch": 0.36, "grad_norm": 1.4944013898709174, "learning_rate": 7.397278688749876e-06, "loss": 0.6649, "step": 5634 }, { "epoch": 0.36, "grad_norm": 0.9991648759850588, "learning_rate": 7.39636899124111e-06, "loss": 0.5742, "step": 5635 }, { "epoch": 0.36, "grad_norm": 1.4730353498015887, "learning_rate": 7.395459190741441e-06, "loss": 0.7517, "step": 5636 }, { "epoch": 0.36, "grad_norm": 1.5734057863410795, "learning_rate": 7.39454928728997e-06, "loss": 0.6944, "step": 5637 }, { "epoch": 0.36, "grad_norm": 1.7332471287976254, "learning_rate": 7.3936392809258e-06, "loss": 0.7017, "step": 5638 }, { "epoch": 0.36, "grad_norm": 1.1302998252746377, "learning_rate": 7.392729171688047e-06, "loss": 0.6421, "step": 5639 }, { "epoch": 0.36, "grad_norm": 1.4127544787855328, "learning_rate": 7.39181895961582e-06, "loss": 0.6856, "step": 5640 }, { "epoch": 0.36, "grad_norm": 1.3255176225736927, "learning_rate": 7.39090864474824e-06, "loss": 0.6644, "step": 5641 }, { "epoch": 0.36, "grad_norm": 1.303060338075856, "learning_rate": 7.389998227124431e-06, "loss": 0.6157, "step": 5642 }, { "epoch": 0.36, "grad_norm": 1.6055763010710675, "learning_rate": 7.389087706783518e-06, "loss": 0.7493, "step": 5643 }, { "epoch": 0.36, "grad_norm": 1.4524620426160428, "learning_rate": 7.3881770837646385e-06, "loss": 0.7004, "step": 5644 }, { "epoch": 0.36, "grad_norm": 1.0982388009777198, "learning_rate": 7.387266358106925e-06, "loss": 0.7462, "step": 5645 }, { "epoch": 0.36, "grad_norm": 3.5137841588209384, "learning_rate": 7.386355529849519e-06, "loss": 0.7016, "step": 5646 }, { "epoch": 0.36, "grad_norm": 1.7076577209375265, "learning_rate": 7.385444599031568e-06, "loss": 0.7719, "step": 5647 }, { "epoch": 0.36, "grad_norm": 1.4466369073566618, "learning_rate": 7.38453356569222e-06, "loss": 0.6974, "step": 5648 }, { "epoch": 0.36, "grad_norm": 1.5213386511987805, "learning_rate": 7.38362242987063e-06, "loss": 0.6973, "step": 5649 }, { "epoch": 0.36, "grad_norm": 1.5052700851912568, "learning_rate": 7.382711191605958e-06, "loss": 0.6467, "step": 5650 }, { "epoch": 0.36, "grad_norm": 1.4626356168849464, "learning_rate": 7.381799850937363e-06, "loss": 0.6726, "step": 5651 }, { "epoch": 0.36, "grad_norm": 1.4902748417787202, "learning_rate": 7.380888407904018e-06, "loss": 0.7224, "step": 5652 }, { "epoch": 0.36, "grad_norm": 1.4821972494174438, "learning_rate": 7.379976862545091e-06, "loss": 0.7375, "step": 5653 }, { "epoch": 0.36, "grad_norm": 1.5118409024142583, "learning_rate": 7.3790652148997595e-06, "loss": 0.7732, "step": 5654 }, { "epoch": 0.36, "grad_norm": 1.9350308403255791, "learning_rate": 7.378153465007203e-06, "loss": 0.6566, "step": 5655 }, { "epoch": 0.36, "grad_norm": 1.310550614650726, "learning_rate": 7.377241612906609e-06, "loss": 0.6648, "step": 5656 }, { "epoch": 0.36, "grad_norm": 2.2810970674339415, "learning_rate": 7.376329658637167e-06, "loss": 0.6779, "step": 5657 }, { "epoch": 0.36, "grad_norm": 1.6516332920448165, "learning_rate": 7.375417602238066e-06, "loss": 0.6554, "step": 5658 }, { "epoch": 0.36, "grad_norm": 1.7288530010546228, "learning_rate": 7.374505443748512e-06, "loss": 0.6731, "step": 5659 }, { "epoch": 0.36, "grad_norm": 1.639771014778545, "learning_rate": 7.373593183207701e-06, "loss": 0.6779, "step": 5660 }, { "epoch": 0.36, "grad_norm": 1.7145703238132777, "learning_rate": 7.372680820654844e-06, "loss": 0.7489, "step": 5661 }, { "epoch": 0.36, "grad_norm": 1.6148195046519571, "learning_rate": 7.37176835612915e-06, "loss": 0.7787, "step": 5662 }, { "epoch": 0.36, "grad_norm": 1.5209431683549113, "learning_rate": 7.3708557896698375e-06, "loss": 0.6326, "step": 5663 }, { "epoch": 0.36, "grad_norm": 1.4257437643504476, "learning_rate": 7.369943121316122e-06, "loss": 0.7009, "step": 5664 }, { "epoch": 0.36, "grad_norm": 1.4524675950513481, "learning_rate": 7.3690303511072324e-06, "loss": 0.6363, "step": 5665 }, { "epoch": 0.36, "grad_norm": 1.2794903797130122, "learning_rate": 7.368117479082397e-06, "loss": 0.679, "step": 5666 }, { "epoch": 0.36, "grad_norm": 1.7262517697665816, "learning_rate": 7.367204505280848e-06, "loss": 0.6113, "step": 5667 }, { "epoch": 0.36, "grad_norm": 1.5841762301530509, "learning_rate": 7.366291429741824e-06, "loss": 0.7101, "step": 5668 }, { "epoch": 0.36, "grad_norm": 1.80077718067855, "learning_rate": 7.365378252504567e-06, "loss": 0.7106, "step": 5669 }, { "epoch": 0.36, "grad_norm": 1.3544930183986241, "learning_rate": 7.3644649736083216e-06, "loss": 0.6226, "step": 5670 }, { "epoch": 0.36, "grad_norm": 2.2532270211924033, "learning_rate": 7.363551593092342e-06, "loss": 0.7709, "step": 5671 }, { "epoch": 0.36, "grad_norm": 1.6587587156964045, "learning_rate": 7.362638110995879e-06, "loss": 0.7033, "step": 5672 }, { "epoch": 0.36, "grad_norm": 1.7221358947081702, "learning_rate": 7.361724527358195e-06, "loss": 0.7329, "step": 5673 }, { "epoch": 0.36, "grad_norm": 1.4989989393308492, "learning_rate": 7.360810842218554e-06, "loss": 0.6355, "step": 5674 }, { "epoch": 0.36, "grad_norm": 1.5510628097071946, "learning_rate": 7.359897055616225e-06, "loss": 0.7651, "step": 5675 }, { "epoch": 0.36, "grad_norm": 1.4019125378226815, "learning_rate": 7.358983167590479e-06, "loss": 0.7518, "step": 5676 }, { "epoch": 0.36, "grad_norm": 1.4212520810704181, "learning_rate": 7.358069178180592e-06, "loss": 0.687, "step": 5677 }, { "epoch": 0.36, "grad_norm": 1.5313219854581301, "learning_rate": 7.357155087425848e-06, "loss": 0.5971, "step": 5678 }, { "epoch": 0.36, "grad_norm": 1.4819215555291487, "learning_rate": 7.356240895365531e-06, "loss": 0.7598, "step": 5679 }, { "epoch": 0.36, "grad_norm": 1.4819873206156844, "learning_rate": 7.35532660203893e-06, "loss": 0.6396, "step": 5680 }, { "epoch": 0.36, "grad_norm": 1.5882215957848842, "learning_rate": 7.354412207485343e-06, "loss": 0.7704, "step": 5681 }, { "epoch": 0.36, "grad_norm": 1.5277518650631967, "learning_rate": 7.353497711744067e-06, "loss": 0.6748, "step": 5682 }, { "epoch": 0.36, "grad_norm": 1.6769128244638423, "learning_rate": 7.352583114854402e-06, "loss": 0.802, "step": 5683 }, { "epoch": 0.36, "grad_norm": 1.457290250440971, "learning_rate": 7.351668416855659e-06, "loss": 0.6627, "step": 5684 }, { "epoch": 0.36, "grad_norm": 1.674017366608452, "learning_rate": 7.350753617787151e-06, "loss": 0.7054, "step": 5685 }, { "epoch": 0.36, "grad_norm": 1.5459927121980934, "learning_rate": 7.349838717688191e-06, "loss": 0.7061, "step": 5686 }, { "epoch": 0.36, "grad_norm": 7.916095632486424, "learning_rate": 7.348923716598099e-06, "loss": 0.735, "step": 5687 }, { "epoch": 0.36, "grad_norm": 1.3738058285537358, "learning_rate": 7.348008614556203e-06, "loss": 0.6887, "step": 5688 }, { "epoch": 0.36, "grad_norm": 1.4191493442205456, "learning_rate": 7.347093411601831e-06, "loss": 0.6212, "step": 5689 }, { "epoch": 0.36, "grad_norm": 1.5418634335924908, "learning_rate": 7.346178107774316e-06, "loss": 0.7261, "step": 5690 }, { "epoch": 0.36, "grad_norm": 1.6011195344436073, "learning_rate": 7.3452627031129964e-06, "loss": 0.6849, "step": 5691 }, { "epoch": 0.36, "grad_norm": 1.690411457912809, "learning_rate": 7.344347197657212e-06, "loss": 0.7233, "step": 5692 }, { "epoch": 0.36, "grad_norm": 1.6087267230708906, "learning_rate": 7.3434315914463125e-06, "loss": 0.6829, "step": 5693 }, { "epoch": 0.36, "grad_norm": 1.6177155924389215, "learning_rate": 7.342515884519646e-06, "loss": 0.8043, "step": 5694 }, { "epoch": 0.36, "grad_norm": 2.3459300569054395, "learning_rate": 7.341600076916571e-06, "loss": 0.6792, "step": 5695 }, { "epoch": 0.36, "grad_norm": 0.9953168216993824, "learning_rate": 7.340684168676444e-06, "loss": 0.6438, "step": 5696 }, { "epoch": 0.36, "grad_norm": 1.629975517470324, "learning_rate": 7.33976815983863e-06, "loss": 0.7772, "step": 5697 }, { "epoch": 0.36, "grad_norm": 1.7487454941788987, "learning_rate": 7.338852050442497e-06, "loss": 0.6517, "step": 5698 }, { "epoch": 0.36, "grad_norm": 1.5858958164838517, "learning_rate": 7.3379358405274195e-06, "loss": 0.6535, "step": 5699 }, { "epoch": 0.36, "grad_norm": 1.5407750732288112, "learning_rate": 7.33701953013277e-06, "loss": 0.6996, "step": 5700 }, { "epoch": 0.36, "grad_norm": 1.256181679937294, "learning_rate": 7.33610311929793e-06, "loss": 0.7089, "step": 5701 }, { "epoch": 0.36, "grad_norm": 1.539636681977522, "learning_rate": 7.33518660806229e-06, "loss": 0.6869, "step": 5702 }, { "epoch": 0.37, "grad_norm": 1.6500232417271676, "learning_rate": 7.334269996465236e-06, "loss": 0.6432, "step": 5703 }, { "epoch": 0.37, "grad_norm": 1.5517682525514886, "learning_rate": 7.333353284546162e-06, "loss": 0.7591, "step": 5704 }, { "epoch": 0.37, "grad_norm": 1.6446670204490006, "learning_rate": 7.332436472344468e-06, "loss": 0.7407, "step": 5705 }, { "epoch": 0.37, "grad_norm": 1.2808904318010443, "learning_rate": 7.331519559899554e-06, "loss": 0.7752, "step": 5706 }, { "epoch": 0.37, "grad_norm": 1.5671458893538834, "learning_rate": 7.330602547250828e-06, "loss": 0.6269, "step": 5707 }, { "epoch": 0.37, "grad_norm": 1.5446481580518947, "learning_rate": 7.329685434437703e-06, "loss": 0.7122, "step": 5708 }, { "epoch": 0.37, "grad_norm": 1.2969230646293033, "learning_rate": 7.3287682214995934e-06, "loss": 0.5959, "step": 5709 }, { "epoch": 0.37, "grad_norm": 1.516510753952357, "learning_rate": 7.327850908475919e-06, "loss": 0.6952, "step": 5710 }, { "epoch": 0.37, "grad_norm": 1.7672421163464112, "learning_rate": 7.326933495406103e-06, "loss": 0.8083, "step": 5711 }, { "epoch": 0.37, "grad_norm": 1.4491294314228058, "learning_rate": 7.326015982329576e-06, "loss": 0.6621, "step": 5712 }, { "epoch": 0.37, "grad_norm": 1.6318351003954847, "learning_rate": 7.32509836928577e-06, "loss": 0.6224, "step": 5713 }, { "epoch": 0.37, "grad_norm": 1.5448589226062124, "learning_rate": 7.3241806563141216e-06, "loss": 0.6443, "step": 5714 }, { "epoch": 0.37, "grad_norm": 1.6689435705809932, "learning_rate": 7.323262843454071e-06, "loss": 0.6229, "step": 5715 }, { "epoch": 0.37, "grad_norm": 1.5070075433348924, "learning_rate": 7.322344930745067e-06, "loss": 0.6694, "step": 5716 }, { "epoch": 0.37, "grad_norm": 1.63020610862663, "learning_rate": 7.321426918226557e-06, "loss": 0.6905, "step": 5717 }, { "epoch": 0.37, "grad_norm": 1.3059603625320868, "learning_rate": 7.320508805937996e-06, "loss": 0.6381, "step": 5718 }, { "epoch": 0.37, "grad_norm": 1.7138652827753795, "learning_rate": 7.319590593918844e-06, "loss": 0.749, "step": 5719 }, { "epoch": 0.37, "grad_norm": 1.1058926966523253, "learning_rate": 7.31867228220856e-06, "loss": 0.6696, "step": 5720 }, { "epoch": 0.37, "grad_norm": 1.5634534622491694, "learning_rate": 7.317753870846615e-06, "loss": 0.6387, "step": 5721 }, { "epoch": 0.37, "grad_norm": 1.4970788261690526, "learning_rate": 7.316835359872477e-06, "loss": 0.6299, "step": 5722 }, { "epoch": 0.37, "grad_norm": 1.2580370823847413, "learning_rate": 7.315916749325626e-06, "loss": 0.81, "step": 5723 }, { "epoch": 0.37, "grad_norm": 1.660050718040377, "learning_rate": 7.314998039245539e-06, "loss": 0.8338, "step": 5724 }, { "epoch": 0.37, "grad_norm": 1.7966408381774475, "learning_rate": 7.3140792296717004e-06, "loss": 0.6563, "step": 5725 }, { "epoch": 0.37, "grad_norm": 1.0585791725894966, "learning_rate": 7.3131603206436e-06, "loss": 0.6615, "step": 5726 }, { "epoch": 0.37, "grad_norm": 1.362085762225849, "learning_rate": 7.312241312200727e-06, "loss": 0.6941, "step": 5727 }, { "epoch": 0.37, "grad_norm": 1.5011892154149724, "learning_rate": 7.311322204382583e-06, "loss": 0.6289, "step": 5728 }, { "epoch": 0.37, "grad_norm": 1.8074442543298406, "learning_rate": 7.310402997228667e-06, "loss": 0.818, "step": 5729 }, { "epoch": 0.37, "grad_norm": 1.3637052016604843, "learning_rate": 7.309483690778485e-06, "loss": 0.6277, "step": 5730 }, { "epoch": 0.37, "grad_norm": 1.6970775156490385, "learning_rate": 7.308564285071547e-06, "loss": 0.7313, "step": 5731 }, { "epoch": 0.37, "grad_norm": 1.5520532089833727, "learning_rate": 7.307644780147367e-06, "loss": 0.6729, "step": 5732 }, { "epoch": 0.37, "grad_norm": 3.4538148048786157, "learning_rate": 7.306725176045464e-06, "loss": 0.7061, "step": 5733 }, { "epoch": 0.37, "grad_norm": 1.6444697998234257, "learning_rate": 7.30580547280536e-06, "loss": 0.7752, "step": 5734 }, { "epoch": 0.37, "grad_norm": 1.5556152168683683, "learning_rate": 7.304885670466581e-06, "loss": 0.704, "step": 5735 }, { "epoch": 0.37, "grad_norm": 1.4444280715210063, "learning_rate": 7.303965769068659e-06, "loss": 0.6426, "step": 5736 }, { "epoch": 0.37, "grad_norm": 1.5166843573614492, "learning_rate": 7.3030457686511305e-06, "loss": 0.6516, "step": 5737 }, { "epoch": 0.37, "grad_norm": 1.5251577335580757, "learning_rate": 7.302125669253533e-06, "loss": 0.6602, "step": 5738 }, { "epoch": 0.37, "grad_norm": 1.5554651500371943, "learning_rate": 7.3012054709154124e-06, "loss": 0.7114, "step": 5739 }, { "epoch": 0.37, "grad_norm": 1.5455742813292501, "learning_rate": 7.3002851736763165e-06, "loss": 0.6819, "step": 5740 }, { "epoch": 0.37, "grad_norm": 1.3382557183636046, "learning_rate": 7.299364777575797e-06, "loss": 0.6582, "step": 5741 }, { "epoch": 0.37, "grad_norm": 1.6154022635090381, "learning_rate": 7.298444282653412e-06, "loss": 0.6237, "step": 5742 }, { "epoch": 0.37, "grad_norm": 1.661844854095335, "learning_rate": 7.29752368894872e-06, "loss": 0.6738, "step": 5743 }, { "epoch": 0.37, "grad_norm": 1.5359557507542272, "learning_rate": 7.296602996501288e-06, "loss": 0.6194, "step": 5744 }, { "epoch": 0.37, "grad_norm": 1.387846610799219, "learning_rate": 7.295682205350685e-06, "loss": 0.6363, "step": 5745 }, { "epoch": 0.37, "grad_norm": 1.5238306285580636, "learning_rate": 7.294761315536485e-06, "loss": 0.7396, "step": 5746 }, { "epoch": 0.37, "grad_norm": 1.5592530516047893, "learning_rate": 7.293840327098265e-06, "loss": 0.7507, "step": 5747 }, { "epoch": 0.37, "grad_norm": 1.8208025472249703, "learning_rate": 7.292919240075609e-06, "loss": 0.6473, "step": 5748 }, { "epoch": 0.37, "grad_norm": 1.6896157766775723, "learning_rate": 7.291998054508102e-06, "loss": 0.7788, "step": 5749 }, { "epoch": 0.37, "grad_norm": 1.6137321109677698, "learning_rate": 7.291076770435333e-06, "loss": 0.6582, "step": 5750 }, { "epoch": 0.37, "grad_norm": 1.6075120818976258, "learning_rate": 7.2901553878969e-06, "loss": 0.6527, "step": 5751 }, { "epoch": 0.37, "grad_norm": 1.5154652809328641, "learning_rate": 7.2892339069324e-06, "loss": 0.701, "step": 5752 }, { "epoch": 0.37, "grad_norm": 1.5994477472755118, "learning_rate": 7.288312327581439e-06, "loss": 0.5662, "step": 5753 }, { "epoch": 0.37, "grad_norm": 1.8952118073999733, "learning_rate": 7.287390649883621e-06, "loss": 0.6535, "step": 5754 }, { "epoch": 0.37, "grad_norm": 1.8657038231620622, "learning_rate": 7.286468873878559e-06, "loss": 0.6997, "step": 5755 }, { "epoch": 0.37, "grad_norm": 1.6176006750580123, "learning_rate": 7.285546999605871e-06, "loss": 0.6328, "step": 5756 }, { "epoch": 0.37, "grad_norm": 1.8062296029355795, "learning_rate": 7.2846250271051735e-06, "loss": 0.7676, "step": 5757 }, { "epoch": 0.37, "grad_norm": 1.5464004387598511, "learning_rate": 7.283702956416092e-06, "loss": 0.7038, "step": 5758 }, { "epoch": 0.37, "grad_norm": 1.6542109121053157, "learning_rate": 7.282780787578258e-06, "loss": 0.7199, "step": 5759 }, { "epoch": 0.37, "grad_norm": 1.650263830498885, "learning_rate": 7.281858520631304e-06, "loss": 0.8187, "step": 5760 }, { "epoch": 0.37, "grad_norm": 1.4382316896713907, "learning_rate": 7.280936155614864e-06, "loss": 0.6398, "step": 5761 }, { "epoch": 0.37, "grad_norm": 1.6230989666562643, "learning_rate": 7.280013692568582e-06, "loss": 0.7052, "step": 5762 }, { "epoch": 0.37, "grad_norm": 1.3033991278820845, "learning_rate": 7.2790911315321015e-06, "loss": 0.6596, "step": 5763 }, { "epoch": 0.37, "grad_norm": 1.3594453563579143, "learning_rate": 7.278168472545072e-06, "loss": 0.6131, "step": 5764 }, { "epoch": 0.37, "grad_norm": 2.133932509443565, "learning_rate": 7.2772457156471496e-06, "loss": 0.7598, "step": 5765 }, { "epoch": 0.37, "grad_norm": 1.6315245013383364, "learning_rate": 7.276322860877992e-06, "loss": 0.6402, "step": 5766 }, { "epoch": 0.37, "grad_norm": 1.4743831144475434, "learning_rate": 7.275399908277261e-06, "loss": 0.7408, "step": 5767 }, { "epoch": 0.37, "grad_norm": 1.5318533206619982, "learning_rate": 7.274476857884622e-06, "loss": 0.6547, "step": 5768 }, { "epoch": 0.37, "grad_norm": 1.5041809108673483, "learning_rate": 7.273553709739749e-06, "loss": 0.577, "step": 5769 }, { "epoch": 0.37, "grad_norm": 1.4211189404948086, "learning_rate": 7.272630463882314e-06, "loss": 0.6538, "step": 5770 }, { "epoch": 0.37, "grad_norm": 1.6111249788977544, "learning_rate": 7.271707120351997e-06, "loss": 0.7084, "step": 5771 }, { "epoch": 0.37, "grad_norm": 1.7128774379016902, "learning_rate": 7.2707836791884815e-06, "loss": 0.6529, "step": 5772 }, { "epoch": 0.37, "grad_norm": 1.3925437065555695, "learning_rate": 7.269860140431455e-06, "loss": 0.7306, "step": 5773 }, { "epoch": 0.37, "grad_norm": 1.5732322685116626, "learning_rate": 7.268936504120609e-06, "loss": 0.6251, "step": 5774 }, { "epoch": 0.37, "grad_norm": 1.5658048667511877, "learning_rate": 7.268012770295641e-06, "loss": 0.6886, "step": 5775 }, { "epoch": 0.37, "grad_norm": 1.3622847750621472, "learning_rate": 7.2670889389962486e-06, "loss": 0.6305, "step": 5776 }, { "epoch": 0.37, "grad_norm": 1.6271286437355585, "learning_rate": 7.266165010262138e-06, "loss": 0.7501, "step": 5777 }, { "epoch": 0.37, "grad_norm": 1.836557848232329, "learning_rate": 7.265240984133017e-06, "loss": 0.6197, "step": 5778 }, { "epoch": 0.37, "grad_norm": 1.6049319822074766, "learning_rate": 7.264316860648598e-06, "loss": 0.6845, "step": 5779 }, { "epoch": 0.37, "grad_norm": 3.7232350074689395, "learning_rate": 7.263392639848599e-06, "loss": 0.6597, "step": 5780 }, { "epoch": 0.37, "grad_norm": 1.53004086815909, "learning_rate": 7.26246832177274e-06, "loss": 0.7142, "step": 5781 }, { "epoch": 0.37, "grad_norm": 1.2356036107058723, "learning_rate": 7.2615439064607475e-06, "loss": 0.6882, "step": 5782 }, { "epoch": 0.37, "grad_norm": 1.5542590845971107, "learning_rate": 7.2606193939523496e-06, "loss": 0.7201, "step": 5783 }, { "epoch": 0.37, "grad_norm": 1.5920792186942438, "learning_rate": 7.25969478428728e-06, "loss": 0.6981, "step": 5784 }, { "epoch": 0.37, "grad_norm": 1.5245557565756858, "learning_rate": 7.258770077505276e-06, "loss": 0.6583, "step": 5785 }, { "epoch": 0.37, "grad_norm": 1.1518257064449993, "learning_rate": 7.257845273646082e-06, "loss": 0.7115, "step": 5786 }, { "epoch": 0.37, "grad_norm": 1.048865068317754, "learning_rate": 7.256920372749441e-06, "loss": 0.6284, "step": 5787 }, { "epoch": 0.37, "grad_norm": 1.124422793517161, "learning_rate": 7.255995374855106e-06, "loss": 0.6185, "step": 5788 }, { "epoch": 0.37, "grad_norm": 1.4179014629184274, "learning_rate": 7.255070280002829e-06, "loss": 0.7406, "step": 5789 }, { "epoch": 0.37, "grad_norm": 1.5298446179790146, "learning_rate": 7.2541450882323714e-06, "loss": 0.7963, "step": 5790 }, { "epoch": 0.37, "grad_norm": 1.5827190883328401, "learning_rate": 7.253219799583495e-06, "loss": 0.6784, "step": 5791 }, { "epoch": 0.37, "grad_norm": 1.9411209307491424, "learning_rate": 7.252294414095965e-06, "loss": 0.7681, "step": 5792 }, { "epoch": 0.37, "grad_norm": 1.5684050798998472, "learning_rate": 7.251368931809554e-06, "loss": 0.6442, "step": 5793 }, { "epoch": 0.37, "grad_norm": 1.5380125457601899, "learning_rate": 7.250443352764036e-06, "loss": 0.6637, "step": 5794 }, { "epoch": 0.37, "grad_norm": 1.5241957463391125, "learning_rate": 7.249517676999192e-06, "loss": 0.6853, "step": 5795 }, { "epoch": 0.37, "grad_norm": 1.5164423985978615, "learning_rate": 7.248591904554807e-06, "loss": 0.6799, "step": 5796 }, { "epoch": 0.37, "grad_norm": 1.4738032290953056, "learning_rate": 7.247666035470666e-06, "loss": 0.6587, "step": 5797 }, { "epoch": 0.37, "grad_norm": 1.4607196430737595, "learning_rate": 7.2467400697865616e-06, "loss": 0.6569, "step": 5798 }, { "epoch": 0.37, "grad_norm": 1.526322343313307, "learning_rate": 7.24581400754229e-06, "loss": 0.6236, "step": 5799 }, { "epoch": 0.37, "grad_norm": 1.5036935315148863, "learning_rate": 7.244887848777651e-06, "loss": 0.6776, "step": 5800 }, { "epoch": 0.37, "grad_norm": 1.476759908476667, "learning_rate": 7.24396159353245e-06, "loss": 0.785, "step": 5801 }, { "epoch": 0.37, "grad_norm": 1.4841502574878818, "learning_rate": 7.2430352418464944e-06, "loss": 0.683, "step": 5802 }, { "epoch": 0.37, "grad_norm": 1.5059129855094198, "learning_rate": 7.242108793759597e-06, "loss": 0.6572, "step": 5803 }, { "epoch": 0.37, "grad_norm": 1.9356553079781076, "learning_rate": 7.2411822493115765e-06, "loss": 0.8056, "step": 5804 }, { "epoch": 0.37, "grad_norm": 1.5901299437831782, "learning_rate": 7.240255608542252e-06, "loss": 0.7132, "step": 5805 }, { "epoch": 0.37, "grad_norm": 1.4818576785608029, "learning_rate": 7.239328871491449e-06, "loss": 0.6818, "step": 5806 }, { "epoch": 0.37, "grad_norm": 1.4380950141554614, "learning_rate": 7.238402038198995e-06, "loss": 0.7378, "step": 5807 }, { "epoch": 0.37, "grad_norm": 1.2884587421506444, "learning_rate": 7.237475108704726e-06, "loss": 0.6465, "step": 5808 }, { "epoch": 0.37, "grad_norm": 1.5973290963591227, "learning_rate": 7.236548083048478e-06, "loss": 0.6801, "step": 5809 }, { "epoch": 0.37, "grad_norm": 1.6799032439194699, "learning_rate": 7.235620961270093e-06, "loss": 0.6576, "step": 5810 }, { "epoch": 0.37, "grad_norm": 1.548808519437039, "learning_rate": 7.234693743409418e-06, "loss": 0.7391, "step": 5811 }, { "epoch": 0.37, "grad_norm": 1.5270549367361037, "learning_rate": 7.233766429506299e-06, "loss": 0.6617, "step": 5812 }, { "epoch": 0.37, "grad_norm": 1.6208728593737602, "learning_rate": 7.232839019600595e-06, "loss": 0.7268, "step": 5813 }, { "epoch": 0.37, "grad_norm": 1.655865731486078, "learning_rate": 7.231911513732162e-06, "loss": 0.7522, "step": 5814 }, { "epoch": 0.37, "grad_norm": 1.7455310655607283, "learning_rate": 7.230983911940861e-06, "loss": 0.7068, "step": 5815 }, { "epoch": 0.37, "grad_norm": 1.5688523126606755, "learning_rate": 7.230056214266559e-06, "loss": 0.6695, "step": 5816 }, { "epoch": 0.37, "grad_norm": 1.579153335736605, "learning_rate": 7.229128420749127e-06, "loss": 0.7723, "step": 5817 }, { "epoch": 0.37, "grad_norm": 1.5767594636240516, "learning_rate": 7.228200531428441e-06, "loss": 0.634, "step": 5818 }, { "epoch": 0.37, "grad_norm": 1.8356656059077245, "learning_rate": 7.227272546344377e-06, "loss": 0.6793, "step": 5819 }, { "epoch": 0.37, "grad_norm": 1.093202105303126, "learning_rate": 7.226344465536821e-06, "loss": 0.6709, "step": 5820 }, { "epoch": 0.37, "grad_norm": 1.4829800045963122, "learning_rate": 7.225416289045655e-06, "loss": 0.7396, "step": 5821 }, { "epoch": 0.37, "grad_norm": 1.6685589239381196, "learning_rate": 7.2244880169107745e-06, "loss": 0.6854, "step": 5822 }, { "epoch": 0.37, "grad_norm": 1.5599989265523644, "learning_rate": 7.2235596491720724e-06, "loss": 0.7146, "step": 5823 }, { "epoch": 0.37, "grad_norm": 1.8336427414762082, "learning_rate": 7.2226311858694506e-06, "loss": 0.7768, "step": 5824 }, { "epoch": 0.37, "grad_norm": 1.657830572844024, "learning_rate": 7.22170262704281e-06, "loss": 0.6988, "step": 5825 }, { "epoch": 0.37, "grad_norm": 1.4585485571509833, "learning_rate": 7.2207739727320605e-06, "loss": 0.6723, "step": 5826 }, { "epoch": 0.37, "grad_norm": 1.4000519813456929, "learning_rate": 7.21984522297711e-06, "loss": 0.6664, "step": 5827 }, { "epoch": 0.37, "grad_norm": 1.4920725649703095, "learning_rate": 7.218916377817877e-06, "loss": 0.7137, "step": 5828 }, { "epoch": 0.37, "grad_norm": 1.559689965544991, "learning_rate": 7.217987437294281e-06, "loss": 0.6994, "step": 5829 }, { "epoch": 0.37, "grad_norm": 1.4116577543192859, "learning_rate": 7.217058401446245e-06, "loss": 0.6261, "step": 5830 }, { "epoch": 0.37, "grad_norm": 1.4655885505058088, "learning_rate": 7.216129270313698e-06, "loss": 0.6992, "step": 5831 }, { "epoch": 0.37, "grad_norm": 1.5112967870311291, "learning_rate": 7.215200043936571e-06, "loss": 0.7013, "step": 5832 }, { "epoch": 0.37, "grad_norm": 1.4925262671715411, "learning_rate": 7.214270722354802e-06, "loss": 0.7409, "step": 5833 }, { "epoch": 0.37, "grad_norm": 1.403960881826181, "learning_rate": 7.21334130560833e-06, "loss": 0.6693, "step": 5834 }, { "epoch": 0.37, "grad_norm": 1.533249117689447, "learning_rate": 7.2124117937371e-06, "loss": 0.668, "step": 5835 }, { "epoch": 0.37, "grad_norm": 1.736549376756925, "learning_rate": 7.211482186781058e-06, "loss": 0.755, "step": 5836 }, { "epoch": 0.37, "grad_norm": 2.2612684687786957, "learning_rate": 7.21055248478016e-06, "loss": 0.6264, "step": 5837 }, { "epoch": 0.37, "grad_norm": 1.7499630267286408, "learning_rate": 7.20962268777436e-06, "loss": 0.7302, "step": 5838 }, { "epoch": 0.37, "grad_norm": 1.4546773161405373, "learning_rate": 7.208692795803622e-06, "loss": 0.6892, "step": 5839 }, { "epoch": 0.37, "grad_norm": 1.6372151134874167, "learning_rate": 7.207762808907908e-06, "loss": 0.6481, "step": 5840 }, { "epoch": 0.37, "grad_norm": 1.9175594148167847, "learning_rate": 7.206832727127186e-06, "loss": 0.7491, "step": 5841 }, { "epoch": 0.37, "grad_norm": 1.5177531471622694, "learning_rate": 7.205902550501433e-06, "loss": 0.6638, "step": 5842 }, { "epoch": 0.37, "grad_norm": 1.5895358182995392, "learning_rate": 7.204972279070623e-06, "loss": 0.7657, "step": 5843 }, { "epoch": 0.37, "grad_norm": 1.7684119946205104, "learning_rate": 7.204041912874736e-06, "loss": 0.7142, "step": 5844 }, { "epoch": 0.37, "grad_norm": 1.6307432227127638, "learning_rate": 7.203111451953761e-06, "loss": 0.6341, "step": 5845 }, { "epoch": 0.37, "grad_norm": 1.4145477141800116, "learning_rate": 7.202180896347684e-06, "loss": 0.6416, "step": 5846 }, { "epoch": 0.37, "grad_norm": 1.414275792060413, "learning_rate": 7.201250246096501e-06, "loss": 0.6585, "step": 5847 }, { "epoch": 0.37, "grad_norm": 1.5308582346645434, "learning_rate": 7.200319501240206e-06, "loss": 0.6856, "step": 5848 }, { "epoch": 0.37, "grad_norm": 1.4616544708143926, "learning_rate": 7.1993886618188025e-06, "loss": 0.7088, "step": 5849 }, { "epoch": 0.37, "grad_norm": 1.4172963884764458, "learning_rate": 7.198457727872297e-06, "loss": 0.6366, "step": 5850 }, { "epoch": 0.37, "grad_norm": 1.6377843615743528, "learning_rate": 7.1975266994406965e-06, "loss": 0.7402, "step": 5851 }, { "epoch": 0.37, "grad_norm": 1.6836035093879933, "learning_rate": 7.196595576564017e-06, "loss": 0.7051, "step": 5852 }, { "epoch": 0.37, "grad_norm": 1.5351869864266634, "learning_rate": 7.195664359282275e-06, "loss": 0.6657, "step": 5853 }, { "epoch": 0.37, "grad_norm": 1.3694356004441388, "learning_rate": 7.194733047635494e-06, "loss": 0.7382, "step": 5854 }, { "epoch": 0.37, "grad_norm": 1.5959930324562686, "learning_rate": 7.193801641663697e-06, "loss": 0.6311, "step": 5855 }, { "epoch": 0.37, "grad_norm": 1.8075943521657127, "learning_rate": 7.192870141406916e-06, "loss": 0.6754, "step": 5856 }, { "epoch": 0.37, "grad_norm": 1.5434653803811185, "learning_rate": 7.191938546905183e-06, "loss": 0.6566, "step": 5857 }, { "epoch": 0.37, "grad_norm": 1.3887790711471817, "learning_rate": 7.191006858198538e-06, "loss": 0.6324, "step": 5858 }, { "epoch": 0.38, "grad_norm": 1.531482658262977, "learning_rate": 7.190075075327021e-06, "loss": 0.6213, "step": 5859 }, { "epoch": 0.38, "grad_norm": 1.527197414313633, "learning_rate": 7.1891431983306805e-06, "loss": 0.76, "step": 5860 }, { "epoch": 0.38, "grad_norm": 1.5034082010937846, "learning_rate": 7.188211227249565e-06, "loss": 0.667, "step": 5861 }, { "epoch": 0.38, "grad_norm": 1.5242886120145738, "learning_rate": 7.1872791621237305e-06, "loss": 0.6956, "step": 5862 }, { "epoch": 0.38, "grad_norm": 1.5580131979560867, "learning_rate": 7.186347002993233e-06, "loss": 0.6782, "step": 5863 }, { "epoch": 0.38, "grad_norm": 1.7106768431884738, "learning_rate": 7.185414749898134e-06, "loss": 0.709, "step": 5864 }, { "epoch": 0.38, "grad_norm": 1.8829870291761712, "learning_rate": 7.184482402878501e-06, "loss": 0.7173, "step": 5865 }, { "epoch": 0.38, "grad_norm": 1.6565401685697962, "learning_rate": 7.183549961974406e-06, "loss": 0.7697, "step": 5866 }, { "epoch": 0.38, "grad_norm": 1.796193814402306, "learning_rate": 7.182617427225922e-06, "loss": 0.718, "step": 5867 }, { "epoch": 0.38, "grad_norm": 6.897400402888817, "learning_rate": 7.1816847986731256e-06, "loss": 0.7343, "step": 5868 }, { "epoch": 0.38, "grad_norm": 1.6335056962091377, "learning_rate": 7.180752076356102e-06, "loss": 0.7101, "step": 5869 }, { "epoch": 0.38, "grad_norm": 1.4135697293484177, "learning_rate": 7.179819260314937e-06, "loss": 0.6546, "step": 5870 }, { "epoch": 0.38, "grad_norm": 1.0840400759934383, "learning_rate": 7.178886350589721e-06, "loss": 0.6772, "step": 5871 }, { "epoch": 0.38, "grad_norm": 1.3971890647500234, "learning_rate": 7.177953347220546e-06, "loss": 0.6874, "step": 5872 }, { "epoch": 0.38, "grad_norm": 1.2914374557517134, "learning_rate": 7.177020250247515e-06, "loss": 0.6847, "step": 5873 }, { "epoch": 0.38, "grad_norm": 1.4958733870226104, "learning_rate": 7.176087059710728e-06, "loss": 0.6837, "step": 5874 }, { "epoch": 0.38, "grad_norm": 1.7194741976568177, "learning_rate": 7.17515377565029e-06, "loss": 0.7121, "step": 5875 }, { "epoch": 0.38, "grad_norm": 1.6547426395886513, "learning_rate": 7.174220398106315e-06, "loss": 0.7841, "step": 5876 }, { "epoch": 0.38, "grad_norm": 1.6862546005875336, "learning_rate": 7.173286927118914e-06, "loss": 0.6334, "step": 5877 }, { "epoch": 0.38, "grad_norm": 1.5935433610114902, "learning_rate": 7.17235336272821e-06, "loss": 0.7808, "step": 5878 }, { "epoch": 0.38, "grad_norm": 1.6340050941145776, "learning_rate": 7.171419704974321e-06, "loss": 0.6443, "step": 5879 }, { "epoch": 0.38, "grad_norm": 1.1183809238036813, "learning_rate": 7.170485953897377e-06, "loss": 0.7456, "step": 5880 }, { "epoch": 0.38, "grad_norm": 1.5194849055831186, "learning_rate": 7.169552109537507e-06, "loss": 0.697, "step": 5881 }, { "epoch": 0.38, "grad_norm": 1.4307749283405566, "learning_rate": 7.168618171934848e-06, "loss": 0.6137, "step": 5882 }, { "epoch": 0.38, "grad_norm": 1.0019919161365363, "learning_rate": 7.167684141129536e-06, "loss": 0.6253, "step": 5883 }, { "epoch": 0.38, "grad_norm": 0.9787817326926374, "learning_rate": 7.166750017161715e-06, "loss": 0.6345, "step": 5884 }, { "epoch": 0.38, "grad_norm": 1.620898139734318, "learning_rate": 7.165815800071529e-06, "loss": 0.8439, "step": 5885 }, { "epoch": 0.38, "grad_norm": 1.1265120792944654, "learning_rate": 7.164881489899131e-06, "loss": 0.6211, "step": 5886 }, { "epoch": 0.38, "grad_norm": 1.8050771691357388, "learning_rate": 7.163947086684677e-06, "loss": 0.7287, "step": 5887 }, { "epoch": 0.38, "grad_norm": 1.5583490300594405, "learning_rate": 7.1630125904683245e-06, "loss": 0.7148, "step": 5888 }, { "epoch": 0.38, "grad_norm": 1.8127655578154653, "learning_rate": 7.1620780012902356e-06, "loss": 0.718, "step": 5889 }, { "epoch": 0.38, "grad_norm": 2.3236852670330403, "learning_rate": 7.161143319190577e-06, "loss": 0.7436, "step": 5890 }, { "epoch": 0.38, "grad_norm": 1.467626875353632, "learning_rate": 7.160208544209521e-06, "loss": 0.6384, "step": 5891 }, { "epoch": 0.38, "grad_norm": 1.732938392141533, "learning_rate": 7.159273676387241e-06, "loss": 0.7094, "step": 5892 }, { "epoch": 0.38, "grad_norm": 1.5860350832762464, "learning_rate": 7.158338715763912e-06, "loss": 0.8136, "step": 5893 }, { "epoch": 0.38, "grad_norm": 1.7110172718157395, "learning_rate": 7.157403662379725e-06, "loss": 0.7475, "step": 5894 }, { "epoch": 0.38, "grad_norm": 1.4244719685134435, "learning_rate": 7.156468516274859e-06, "loss": 0.6918, "step": 5895 }, { "epoch": 0.38, "grad_norm": 1.8107482139329758, "learning_rate": 7.155533277489508e-06, "loss": 0.6998, "step": 5896 }, { "epoch": 0.38, "grad_norm": 1.3939440492504183, "learning_rate": 7.154597946063867e-06, "loss": 0.7015, "step": 5897 }, { "epoch": 0.38, "grad_norm": 1.5398745791112285, "learning_rate": 7.153662522038134e-06, "loss": 0.7611, "step": 5898 }, { "epoch": 0.38, "grad_norm": 1.5575211129845794, "learning_rate": 7.152727005452511e-06, "loss": 0.7048, "step": 5899 }, { "epoch": 0.38, "grad_norm": 1.3719203921348622, "learning_rate": 7.151791396347203e-06, "loss": 0.6945, "step": 5900 }, { "epoch": 0.38, "grad_norm": 1.4359989385373695, "learning_rate": 7.1508556947624245e-06, "loss": 0.9027, "step": 5901 }, { "epoch": 0.38, "grad_norm": 1.5813237843060524, "learning_rate": 7.149919900738387e-06, "loss": 0.7253, "step": 5902 }, { "epoch": 0.38, "grad_norm": 1.4561779241391903, "learning_rate": 7.14898401431531e-06, "loss": 0.6145, "step": 5903 }, { "epoch": 0.38, "grad_norm": 1.8210028314458842, "learning_rate": 7.1480480355334155e-06, "loss": 0.7032, "step": 5904 }, { "epoch": 0.38, "grad_norm": 1.3946090434112044, "learning_rate": 7.14711196443293e-06, "loss": 0.5763, "step": 5905 }, { "epoch": 0.38, "grad_norm": 2.2566310075098537, "learning_rate": 7.146175801054084e-06, "loss": 0.6446, "step": 5906 }, { "epoch": 0.38, "grad_norm": 1.6825041868574593, "learning_rate": 7.145239545437113e-06, "loss": 0.7027, "step": 5907 }, { "epoch": 0.38, "grad_norm": 1.1639775169854825, "learning_rate": 7.144303197622251e-06, "loss": 0.726, "step": 5908 }, { "epoch": 0.38, "grad_norm": 1.510426864240262, "learning_rate": 7.143366757649746e-06, "loss": 0.719, "step": 5909 }, { "epoch": 0.38, "grad_norm": 1.0456750850508203, "learning_rate": 7.142430225559841e-06, "loss": 0.7502, "step": 5910 }, { "epoch": 0.38, "grad_norm": 1.4124233396267423, "learning_rate": 7.141493601392787e-06, "loss": 0.6409, "step": 5911 }, { "epoch": 0.38, "grad_norm": 1.9327188809285243, "learning_rate": 7.1405568851888384e-06, "loss": 0.742, "step": 5912 }, { "epoch": 0.38, "grad_norm": 0.9160914660662421, "learning_rate": 7.139620076988252e-06, "loss": 0.6013, "step": 5913 }, { "epoch": 0.38, "grad_norm": 1.6621819386874541, "learning_rate": 7.138683176831289e-06, "loss": 0.699, "step": 5914 }, { "epoch": 0.38, "grad_norm": 1.633753252094322, "learning_rate": 7.137746184758218e-06, "loss": 0.7102, "step": 5915 }, { "epoch": 0.38, "grad_norm": 1.45447411943849, "learning_rate": 7.13680910080931e-06, "loss": 0.6721, "step": 5916 }, { "epoch": 0.38, "grad_norm": 1.4278062893826624, "learning_rate": 7.135871925024835e-06, "loss": 0.7209, "step": 5917 }, { "epoch": 0.38, "grad_norm": 1.5625800401538303, "learning_rate": 7.134934657445074e-06, "loss": 0.7444, "step": 5918 }, { "epoch": 0.38, "grad_norm": 1.652685998431296, "learning_rate": 7.133997298110308e-06, "loss": 0.7112, "step": 5919 }, { "epoch": 0.38, "grad_norm": 1.5823286135127832, "learning_rate": 7.133059847060821e-06, "loss": 0.6672, "step": 5920 }, { "epoch": 0.38, "grad_norm": 1.3243080509474718, "learning_rate": 7.1321223043369034e-06, "loss": 0.6423, "step": 5921 }, { "epoch": 0.38, "grad_norm": 1.2135244388219504, "learning_rate": 7.13118466997885e-06, "loss": 0.6702, "step": 5922 }, { "epoch": 0.38, "grad_norm": 1.9800767752212252, "learning_rate": 7.130246944026958e-06, "loss": 0.6973, "step": 5923 }, { "epoch": 0.38, "grad_norm": 1.4410478790028205, "learning_rate": 7.129309126521528e-06, "loss": 0.6968, "step": 5924 }, { "epoch": 0.38, "grad_norm": 1.6336180514007452, "learning_rate": 7.128371217502868e-06, "loss": 0.7088, "step": 5925 }, { "epoch": 0.38, "grad_norm": 1.6248396386828206, "learning_rate": 7.127433217011283e-06, "loss": 0.6621, "step": 5926 }, { "epoch": 0.38, "grad_norm": 1.6371140467237009, "learning_rate": 7.12649512508709e-06, "loss": 0.7704, "step": 5927 }, { "epoch": 0.38, "grad_norm": 1.7044377301548779, "learning_rate": 7.125556941770604e-06, "loss": 0.7168, "step": 5928 }, { "epoch": 0.38, "grad_norm": 1.632456753297403, "learning_rate": 7.1246186671021475e-06, "loss": 0.7337, "step": 5929 }, { "epoch": 0.38, "grad_norm": 1.5910528812534528, "learning_rate": 7.123680301122044e-06, "loss": 0.7383, "step": 5930 }, { "epoch": 0.38, "grad_norm": 1.867718867655616, "learning_rate": 7.122741843870626e-06, "loss": 0.7954, "step": 5931 }, { "epoch": 0.38, "grad_norm": 1.5454073163491622, "learning_rate": 7.121803295388223e-06, "loss": 0.7495, "step": 5932 }, { "epoch": 0.38, "grad_norm": 1.4437675881889056, "learning_rate": 7.120864655715172e-06, "loss": 0.6809, "step": 5933 }, { "epoch": 0.38, "grad_norm": 1.5379961106181872, "learning_rate": 7.119925924891815e-06, "loss": 0.6411, "step": 5934 }, { "epoch": 0.38, "grad_norm": 1.5726387252113798, "learning_rate": 7.118987102958498e-06, "loss": 0.6414, "step": 5935 }, { "epoch": 0.38, "grad_norm": 1.5317053621849839, "learning_rate": 7.1180481899555655e-06, "loss": 0.7238, "step": 5936 }, { "epoch": 0.38, "grad_norm": 1.643777496777526, "learning_rate": 7.117109185923374e-06, "loss": 0.6494, "step": 5937 }, { "epoch": 0.38, "grad_norm": 1.4569496637079462, "learning_rate": 7.1161700909022776e-06, "loss": 0.6632, "step": 5938 }, { "epoch": 0.38, "grad_norm": 1.5602005065610687, "learning_rate": 7.115230904932639e-06, "loss": 0.7427, "step": 5939 }, { "epoch": 0.38, "grad_norm": 1.7191512820218104, "learning_rate": 7.1142916280548195e-06, "loss": 0.7222, "step": 5940 }, { "epoch": 0.38, "grad_norm": 1.1885445608927858, "learning_rate": 7.113352260309189e-06, "loss": 0.6613, "step": 5941 }, { "epoch": 0.38, "grad_norm": 1.5640637600840384, "learning_rate": 7.112412801736117e-06, "loss": 0.5697, "step": 5942 }, { "epoch": 0.38, "grad_norm": 1.6857152119815575, "learning_rate": 7.111473252375983e-06, "loss": 0.7487, "step": 5943 }, { "epoch": 0.38, "grad_norm": 1.5901346335023052, "learning_rate": 7.110533612269166e-06, "loss": 0.6924, "step": 5944 }, { "epoch": 0.38, "grad_norm": 1.5069734614696983, "learning_rate": 7.109593881456048e-06, "loss": 0.6846, "step": 5945 }, { "epoch": 0.38, "grad_norm": 1.5952093441730284, "learning_rate": 7.108654059977019e-06, "loss": 0.7163, "step": 5946 }, { "epoch": 0.38, "grad_norm": 1.4579003177487937, "learning_rate": 7.10771414787247e-06, "loss": 0.6951, "step": 5947 }, { "epoch": 0.38, "grad_norm": 1.4528660090449685, "learning_rate": 7.106774145182796e-06, "loss": 0.6993, "step": 5948 }, { "epoch": 0.38, "grad_norm": 1.3781101297506093, "learning_rate": 7.105834051948395e-06, "loss": 0.745, "step": 5949 }, { "epoch": 0.38, "grad_norm": 1.5641600169988497, "learning_rate": 7.10489386820967e-06, "loss": 0.7511, "step": 5950 }, { "epoch": 0.38, "grad_norm": 4.40091523454876, "learning_rate": 7.1039535940070305e-06, "loss": 0.6509, "step": 5951 }, { "epoch": 0.38, "grad_norm": 1.7785995245365729, "learning_rate": 7.103013229380887e-06, "loss": 0.6824, "step": 5952 }, { "epoch": 0.38, "grad_norm": 2.013972372964136, "learning_rate": 7.102072774371654e-06, "loss": 0.7627, "step": 5953 }, { "epoch": 0.38, "grad_norm": 1.4050283928130867, "learning_rate": 7.1011322290197515e-06, "loss": 0.6544, "step": 5954 }, { "epoch": 0.38, "grad_norm": 1.046333916317262, "learning_rate": 7.1001915933655994e-06, "loss": 0.6652, "step": 5955 }, { "epoch": 0.38, "grad_norm": 1.1235425928646539, "learning_rate": 7.099250867449626e-06, "loss": 0.6494, "step": 5956 }, { "epoch": 0.38, "grad_norm": 1.5986726386814951, "learning_rate": 7.098310051312261e-06, "loss": 0.6468, "step": 5957 }, { "epoch": 0.38, "grad_norm": 1.5445477621229884, "learning_rate": 7.09736914499394e-06, "loss": 0.7344, "step": 5958 }, { "epoch": 0.38, "grad_norm": 1.909692771712728, "learning_rate": 7.096428148535101e-06, "loss": 0.7011, "step": 5959 }, { "epoch": 0.38, "grad_norm": 1.53360104485821, "learning_rate": 7.095487061976183e-06, "loss": 0.6383, "step": 5960 }, { "epoch": 0.38, "grad_norm": 1.5519088329909692, "learning_rate": 7.094545885357636e-06, "loss": 0.7008, "step": 5961 }, { "epoch": 0.38, "grad_norm": 2.151638231029111, "learning_rate": 7.093604618719907e-06, "loss": 0.7099, "step": 5962 }, { "epoch": 0.38, "grad_norm": 1.4959383801621253, "learning_rate": 7.092663262103452e-06, "loss": 0.6724, "step": 5963 }, { "epoch": 0.38, "grad_norm": 1.086922904596248, "learning_rate": 7.091721815548727e-06, "loss": 0.6352, "step": 5964 }, { "epoch": 0.38, "grad_norm": 1.4310428543203961, "learning_rate": 7.0907802790961925e-06, "loss": 0.6842, "step": 5965 }, { "epoch": 0.38, "grad_norm": 1.7598107166291186, "learning_rate": 7.089838652786316e-06, "loss": 0.7214, "step": 5966 }, { "epoch": 0.38, "grad_norm": 1.0823122114894521, "learning_rate": 7.088896936659566e-06, "loss": 0.6562, "step": 5967 }, { "epoch": 0.38, "grad_norm": 1.4308256428504063, "learning_rate": 7.087955130756414e-06, "loss": 0.7108, "step": 5968 }, { "epoch": 0.38, "grad_norm": 1.7408267526364534, "learning_rate": 7.087013235117339e-06, "loss": 0.7568, "step": 5969 }, { "epoch": 0.38, "grad_norm": 1.6093826907341202, "learning_rate": 7.08607124978282e-06, "loss": 0.7783, "step": 5970 }, { "epoch": 0.38, "grad_norm": 1.5146348956255775, "learning_rate": 7.0851291747933415e-06, "loss": 0.6519, "step": 5971 }, { "epoch": 0.38, "grad_norm": 1.7372115175556457, "learning_rate": 7.084187010189393e-06, "loss": 0.6324, "step": 5972 }, { "epoch": 0.38, "grad_norm": 1.3513479379163702, "learning_rate": 7.083244756011466e-06, "loss": 0.6629, "step": 5973 }, { "epoch": 0.38, "grad_norm": 1.5283648739727058, "learning_rate": 7.082302412300057e-06, "loss": 0.6087, "step": 5974 }, { "epoch": 0.38, "grad_norm": 1.5954397127083002, "learning_rate": 7.081359979095667e-06, "loss": 0.6812, "step": 5975 }, { "epoch": 0.38, "grad_norm": 1.5880137406536112, "learning_rate": 7.080417456438798e-06, "loss": 0.7162, "step": 5976 }, { "epoch": 0.38, "grad_norm": 1.8250413566636141, "learning_rate": 7.079474844369958e-06, "loss": 0.6148, "step": 5977 }, { "epoch": 0.38, "grad_norm": 1.267096779246928, "learning_rate": 7.0785321429296585e-06, "loss": 0.6419, "step": 5978 }, { "epoch": 0.38, "grad_norm": 1.9072431983175886, "learning_rate": 7.077589352158415e-06, "loss": 0.7272, "step": 5979 }, { "epoch": 0.38, "grad_norm": 1.4019658561815147, "learning_rate": 7.0766464720967466e-06, "loss": 0.6802, "step": 5980 }, { "epoch": 0.38, "grad_norm": 1.74817193729955, "learning_rate": 7.075703502785178e-06, "loss": 0.6987, "step": 5981 }, { "epoch": 0.38, "grad_norm": 1.69621232465275, "learning_rate": 7.0747604442642324e-06, "loss": 0.7416, "step": 5982 }, { "epoch": 0.38, "grad_norm": 3.935290650283568, "learning_rate": 7.073817296574444e-06, "loss": 0.6025, "step": 5983 }, { "epoch": 0.38, "grad_norm": 1.4839827459500996, "learning_rate": 7.072874059756346e-06, "loss": 0.743, "step": 5984 }, { "epoch": 0.38, "grad_norm": 1.361681359615694, "learning_rate": 7.071930733850476e-06, "loss": 0.6847, "step": 5985 }, { "epoch": 0.38, "grad_norm": 1.0506875488296867, "learning_rate": 7.070987318897377e-06, "loss": 0.7003, "step": 5986 }, { "epoch": 0.38, "grad_norm": 1.4199625852972297, "learning_rate": 7.070043814937595e-06, "loss": 0.758, "step": 5987 }, { "epoch": 0.38, "grad_norm": 1.3786889192792446, "learning_rate": 7.069100222011678e-06, "loss": 0.72, "step": 5988 }, { "epoch": 0.38, "grad_norm": 1.6415756426064259, "learning_rate": 7.068156540160182e-06, "loss": 0.6754, "step": 5989 }, { "epoch": 0.38, "grad_norm": 1.4194482297615392, "learning_rate": 7.0672127694236655e-06, "loss": 0.6667, "step": 5990 }, { "epoch": 0.38, "grad_norm": 1.6100160242197414, "learning_rate": 7.066268909842687e-06, "loss": 0.8309, "step": 5991 }, { "epoch": 0.38, "grad_norm": 1.7080056268283574, "learning_rate": 7.065324961457812e-06, "loss": 0.7958, "step": 5992 }, { "epoch": 0.38, "grad_norm": 1.427941266505856, "learning_rate": 7.06438092430961e-06, "loss": 0.739, "step": 5993 }, { "epoch": 0.38, "grad_norm": 1.4289587972084632, "learning_rate": 7.0634367984386545e-06, "loss": 0.7268, "step": 5994 }, { "epoch": 0.38, "grad_norm": 1.8040535298099059, "learning_rate": 7.062492583885521e-06, "loss": 0.6292, "step": 5995 }, { "epoch": 0.38, "grad_norm": 1.6844684705044932, "learning_rate": 7.061548280690791e-06, "loss": 0.7377, "step": 5996 }, { "epoch": 0.38, "grad_norm": 1.5640845370639427, "learning_rate": 7.060603888895046e-06, "loss": 0.7594, "step": 5997 }, { "epoch": 0.38, "grad_norm": 1.2257645394467018, "learning_rate": 7.059659408538876e-06, "loss": 0.5958, "step": 5998 }, { "epoch": 0.38, "grad_norm": 1.9836391331253482, "learning_rate": 7.058714839662874e-06, "loss": 0.6975, "step": 5999 }, { "epoch": 0.38, "grad_norm": 1.5675203460785418, "learning_rate": 7.057770182307633e-06, "loss": 0.5769, "step": 6000 }, { "epoch": 0.38, "grad_norm": 1.6621496881985933, "learning_rate": 7.056825436513754e-06, "loss": 0.7593, "step": 6001 }, { "epoch": 0.38, "grad_norm": 1.5625398857501698, "learning_rate": 7.055880602321839e-06, "loss": 0.6853, "step": 6002 }, { "epoch": 0.38, "grad_norm": 1.5309607340573952, "learning_rate": 7.054935679772497e-06, "loss": 0.6736, "step": 6003 }, { "epoch": 0.38, "grad_norm": 1.4874130946761523, "learning_rate": 7.0539906689063364e-06, "loss": 0.735, "step": 6004 }, { "epoch": 0.38, "grad_norm": 1.1078176526949828, "learning_rate": 7.053045569763973e-06, "loss": 0.6113, "step": 6005 }, { "epoch": 0.38, "grad_norm": 1.8153993603738552, "learning_rate": 7.052100382386026e-06, "loss": 0.6913, "step": 6006 }, { "epoch": 0.38, "grad_norm": 1.4245131657421035, "learning_rate": 7.051155106813114e-06, "loss": 0.7538, "step": 6007 }, { "epoch": 0.38, "grad_norm": 1.6911920750251992, "learning_rate": 7.050209743085867e-06, "loss": 0.6444, "step": 6008 }, { "epoch": 0.38, "grad_norm": 1.2218758568824102, "learning_rate": 7.049264291244915e-06, "loss": 0.8085, "step": 6009 }, { "epoch": 0.38, "grad_norm": 1.511415643086729, "learning_rate": 7.048318751330889e-06, "loss": 0.6483, "step": 6010 }, { "epoch": 0.38, "grad_norm": 1.5151845295184732, "learning_rate": 7.047373123384426e-06, "loss": 0.6187, "step": 6011 }, { "epoch": 0.38, "grad_norm": 2.462105919289432, "learning_rate": 7.04642740744617e-06, "loss": 0.7116, "step": 6012 }, { "epoch": 0.38, "grad_norm": 1.1522916069779474, "learning_rate": 7.045481603556763e-06, "loss": 0.6701, "step": 6013 }, { "epoch": 0.38, "grad_norm": 1.4987632766529686, "learning_rate": 7.044535711756855e-06, "loss": 0.761, "step": 6014 }, { "epoch": 0.38, "grad_norm": 1.8374708373975546, "learning_rate": 7.043589732087098e-06, "loss": 0.743, "step": 6015 }, { "epoch": 0.39, "grad_norm": 1.3850641919465256, "learning_rate": 7.042643664588149e-06, "loss": 0.6494, "step": 6016 }, { "epoch": 0.39, "grad_norm": 1.5555088591865256, "learning_rate": 7.041697509300667e-06, "loss": 0.7053, "step": 6017 }, { "epoch": 0.39, "grad_norm": 1.5650706968473993, "learning_rate": 7.0407512662653174e-06, "loss": 0.6523, "step": 6018 }, { "epoch": 0.39, "grad_norm": 1.3492077214845428, "learning_rate": 7.039804935522766e-06, "loss": 0.6283, "step": 6019 }, { "epoch": 0.39, "grad_norm": 1.3017947548527518, "learning_rate": 7.038858517113684e-06, "loss": 0.6713, "step": 6020 }, { "epoch": 0.39, "grad_norm": 1.49914269694362, "learning_rate": 7.037912011078749e-06, "loss": 0.7807, "step": 6021 }, { "epoch": 0.39, "grad_norm": 1.5097608610240665, "learning_rate": 7.036965417458635e-06, "loss": 0.7036, "step": 6022 }, { "epoch": 0.39, "grad_norm": 1.5004754782805656, "learning_rate": 7.03601873629403e-06, "loss": 0.7632, "step": 6023 }, { "epoch": 0.39, "grad_norm": 1.508397433091249, "learning_rate": 7.035071967625617e-06, "loss": 0.6764, "step": 6024 }, { "epoch": 0.39, "grad_norm": 1.592822151656654, "learning_rate": 7.0341251114940864e-06, "loss": 0.7399, "step": 6025 }, { "epoch": 0.39, "grad_norm": 1.405315771332696, "learning_rate": 7.0331781679401345e-06, "loss": 0.6757, "step": 6026 }, { "epoch": 0.39, "grad_norm": 1.7861279660087075, "learning_rate": 7.032231137004457e-06, "loss": 0.7339, "step": 6027 }, { "epoch": 0.39, "grad_norm": 1.5538425319595748, "learning_rate": 7.031284018727756e-06, "loss": 0.6688, "step": 6028 }, { "epoch": 0.39, "grad_norm": 1.400307329839631, "learning_rate": 7.030336813150734e-06, "loss": 0.6537, "step": 6029 }, { "epoch": 0.39, "grad_norm": 1.6273265524004203, "learning_rate": 7.029389520314103e-06, "loss": 0.6788, "step": 6030 }, { "epoch": 0.39, "grad_norm": 1.549634387110354, "learning_rate": 7.028442140258576e-06, "loss": 0.736, "step": 6031 }, { "epoch": 0.39, "grad_norm": 1.8914233398734261, "learning_rate": 7.027494673024867e-06, "loss": 0.7454, "step": 6032 }, { "epoch": 0.39, "grad_norm": 1.6159559734787778, "learning_rate": 7.026547118653697e-06, "loss": 0.7016, "step": 6033 }, { "epoch": 0.39, "grad_norm": 1.680509664646851, "learning_rate": 7.0255994771857906e-06, "loss": 0.7506, "step": 6034 }, { "epoch": 0.39, "grad_norm": 1.6121205654629363, "learning_rate": 7.024651748661875e-06, "loss": 0.6145, "step": 6035 }, { "epoch": 0.39, "grad_norm": 1.3784630778990663, "learning_rate": 7.023703933122683e-06, "loss": 0.6338, "step": 6036 }, { "epoch": 0.39, "grad_norm": 1.628584737631853, "learning_rate": 7.022756030608946e-06, "loss": 0.646, "step": 6037 }, { "epoch": 0.39, "grad_norm": 1.4767680125090492, "learning_rate": 7.0218080411614065e-06, "loss": 0.71, "step": 6038 }, { "epoch": 0.39, "grad_norm": 1.6682887823562347, "learning_rate": 7.0208599648208054e-06, "loss": 0.702, "step": 6039 }, { "epoch": 0.39, "grad_norm": 1.4815329669721908, "learning_rate": 7.01991180162789e-06, "loss": 0.6554, "step": 6040 }, { "epoch": 0.39, "grad_norm": 1.4758053047995487, "learning_rate": 7.01896355162341e-06, "loss": 0.7345, "step": 6041 }, { "epoch": 0.39, "grad_norm": 1.558886041415082, "learning_rate": 7.018015214848119e-06, "loss": 0.7644, "step": 6042 }, { "epoch": 0.39, "grad_norm": 1.574518977386488, "learning_rate": 7.017066791342773e-06, "loss": 0.6984, "step": 6043 }, { "epoch": 0.39, "grad_norm": 1.3913794229905179, "learning_rate": 7.016118281148134e-06, "loss": 0.6753, "step": 6044 }, { "epoch": 0.39, "grad_norm": 1.6664533841134495, "learning_rate": 7.01516968430497e-06, "loss": 0.6116, "step": 6045 }, { "epoch": 0.39, "grad_norm": 1.1560078763881583, "learning_rate": 7.014221000854047e-06, "loss": 0.6015, "step": 6046 }, { "epoch": 0.39, "grad_norm": 1.5981237063866938, "learning_rate": 7.013272230836139e-06, "loss": 0.6784, "step": 6047 }, { "epoch": 0.39, "grad_norm": 1.4492325027960524, "learning_rate": 7.01232337429202e-06, "loss": 0.7213, "step": 6048 }, { "epoch": 0.39, "grad_norm": 1.4845264531115518, "learning_rate": 7.01137443126247e-06, "loss": 0.7345, "step": 6049 }, { "epoch": 0.39, "grad_norm": 1.6890825670354883, "learning_rate": 7.010425401788273e-06, "loss": 0.6866, "step": 6050 }, { "epoch": 0.39, "grad_norm": 1.1545669911042529, "learning_rate": 7.009476285910218e-06, "loss": 0.7479, "step": 6051 }, { "epoch": 0.39, "grad_norm": 1.5361766175899823, "learning_rate": 7.008527083669094e-06, "loss": 0.6872, "step": 6052 }, { "epoch": 0.39, "grad_norm": 1.385326562763441, "learning_rate": 7.007577795105697e-06, "loss": 0.682, "step": 6053 }, { "epoch": 0.39, "grad_norm": 1.4918231494478345, "learning_rate": 7.0066284202608245e-06, "loss": 0.7787, "step": 6054 }, { "epoch": 0.39, "grad_norm": 1.702062306001649, "learning_rate": 7.005678959175279e-06, "loss": 0.6565, "step": 6055 }, { "epoch": 0.39, "grad_norm": 1.5099397604930784, "learning_rate": 7.0047294118898675e-06, "loss": 0.6878, "step": 6056 }, { "epoch": 0.39, "grad_norm": 1.5462898464535413, "learning_rate": 7.003779778445398e-06, "loss": 0.7314, "step": 6057 }, { "epoch": 0.39, "grad_norm": 1.6639877725602037, "learning_rate": 7.0028300588826825e-06, "loss": 0.7254, "step": 6058 }, { "epoch": 0.39, "grad_norm": 4.775361975683425, "learning_rate": 7.001880253242541e-06, "loss": 0.7202, "step": 6059 }, { "epoch": 0.39, "grad_norm": 1.8776556757768759, "learning_rate": 7.000930361565792e-06, "loss": 0.6914, "step": 6060 }, { "epoch": 0.39, "grad_norm": 1.786559158929665, "learning_rate": 6.999980383893261e-06, "loss": 0.7522, "step": 6061 }, { "epoch": 0.39, "grad_norm": 1.69808112183688, "learning_rate": 6.999030320265775e-06, "loss": 0.6952, "step": 6062 }, { "epoch": 0.39, "grad_norm": 1.2719624672088001, "learning_rate": 6.998080170724167e-06, "loss": 0.7312, "step": 6063 }, { "epoch": 0.39, "grad_norm": 1.4654010318169164, "learning_rate": 6.997129935309272e-06, "loss": 0.6649, "step": 6064 }, { "epoch": 0.39, "grad_norm": 1.4687828208590166, "learning_rate": 6.996179614061929e-06, "loss": 0.7274, "step": 6065 }, { "epoch": 0.39, "grad_norm": 1.10959413793876, "learning_rate": 6.99522920702298e-06, "loss": 0.6012, "step": 6066 }, { "epoch": 0.39, "grad_norm": 1.446870671660044, "learning_rate": 6.9942787142332735e-06, "loss": 0.8266, "step": 6067 }, { "epoch": 0.39, "grad_norm": 1.7084872452472455, "learning_rate": 6.993328135733658e-06, "loss": 0.7001, "step": 6068 }, { "epoch": 0.39, "grad_norm": 1.7169569279213084, "learning_rate": 6.992377471564987e-06, "loss": 0.6756, "step": 6069 }, { "epoch": 0.39, "grad_norm": 1.5938249344491333, "learning_rate": 6.9914267217681195e-06, "loss": 0.7315, "step": 6070 }, { "epoch": 0.39, "grad_norm": 1.437941101857116, "learning_rate": 6.990475886383915e-06, "loss": 0.7263, "step": 6071 }, { "epoch": 0.39, "grad_norm": 1.4752479812634902, "learning_rate": 6.98952496545324e-06, "loss": 0.6679, "step": 6072 }, { "epoch": 0.39, "grad_norm": 1.7203065721024764, "learning_rate": 6.988573959016963e-06, "loss": 0.6985, "step": 6073 }, { "epoch": 0.39, "grad_norm": 1.1623241233640331, "learning_rate": 6.987622867115956e-06, "loss": 0.7209, "step": 6074 }, { "epoch": 0.39, "grad_norm": 2.34490882200217, "learning_rate": 6.9866716897910945e-06, "loss": 0.692, "step": 6075 }, { "epoch": 0.39, "grad_norm": 1.433132279474264, "learning_rate": 6.985720427083258e-06, "loss": 0.6988, "step": 6076 }, { "epoch": 0.39, "grad_norm": 1.5879432080851759, "learning_rate": 6.984769079033331e-06, "loss": 0.7582, "step": 6077 }, { "epoch": 0.39, "grad_norm": 1.584136744687922, "learning_rate": 6.983817645682199e-06, "loss": 0.8151, "step": 6078 }, { "epoch": 0.39, "grad_norm": 1.5417234327367326, "learning_rate": 6.982866127070753e-06, "loss": 0.7453, "step": 6079 }, { "epoch": 0.39, "grad_norm": 1.5962813803060871, "learning_rate": 6.981914523239888e-06, "loss": 0.6916, "step": 6080 }, { "epoch": 0.39, "grad_norm": 2.83111147645004, "learning_rate": 6.9809628342305e-06, "loss": 0.7397, "step": 6081 }, { "epoch": 0.39, "grad_norm": 1.5147767805234873, "learning_rate": 6.980011060083493e-06, "loss": 0.6643, "step": 6082 }, { "epoch": 0.39, "grad_norm": 1.2133858895382172, "learning_rate": 6.9790592008397705e-06, "loss": 0.6787, "step": 6083 }, { "epoch": 0.39, "grad_norm": 1.7041639316202837, "learning_rate": 6.978107256540243e-06, "loss": 0.6985, "step": 6084 }, { "epoch": 0.39, "grad_norm": 1.5110143110755705, "learning_rate": 6.9771552272258226e-06, "loss": 0.7574, "step": 6085 }, { "epoch": 0.39, "grad_norm": 1.1259329176686848, "learning_rate": 6.976203112937423e-06, "loss": 0.6048, "step": 6086 }, { "epoch": 0.39, "grad_norm": 1.388040637305107, "learning_rate": 6.975250913715968e-06, "loss": 0.6568, "step": 6087 }, { "epoch": 0.39, "grad_norm": 1.7212422557192641, "learning_rate": 6.97429862960238e-06, "loss": 0.759, "step": 6088 }, { "epoch": 0.39, "grad_norm": 1.612013024518374, "learning_rate": 6.973346260637583e-06, "loss": 0.7656, "step": 6089 }, { "epoch": 0.39, "grad_norm": 1.3103819979191829, "learning_rate": 6.972393806862512e-06, "loss": 0.6847, "step": 6090 }, { "epoch": 0.39, "grad_norm": 1.483343310433907, "learning_rate": 6.9714412683181e-06, "loss": 0.6717, "step": 6091 }, { "epoch": 0.39, "grad_norm": 1.6308188823011118, "learning_rate": 6.970488645045284e-06, "loss": 0.7549, "step": 6092 }, { "epoch": 0.39, "grad_norm": 1.4102221996586504, "learning_rate": 6.969535937085006e-06, "loss": 0.7055, "step": 6093 }, { "epoch": 0.39, "grad_norm": 1.647586558152998, "learning_rate": 6.968583144478214e-06, "loss": 0.6784, "step": 6094 }, { "epoch": 0.39, "grad_norm": 1.5426395564915174, "learning_rate": 6.967630267265854e-06, "loss": 0.6503, "step": 6095 }, { "epoch": 0.39, "grad_norm": 1.5099036901179417, "learning_rate": 6.966677305488879e-06, "loss": 0.7463, "step": 6096 }, { "epoch": 0.39, "grad_norm": 2.3922326912327487, "learning_rate": 6.965724259188246e-06, "loss": 0.7069, "step": 6097 }, { "epoch": 0.39, "grad_norm": 1.2875984691530162, "learning_rate": 6.9647711284049166e-06, "loss": 0.6321, "step": 6098 }, { "epoch": 0.39, "grad_norm": 1.6490280979425196, "learning_rate": 6.96381791317985e-06, "loss": 0.7517, "step": 6099 }, { "epoch": 0.39, "grad_norm": 1.1321687400398046, "learning_rate": 6.962864613554018e-06, "loss": 0.6548, "step": 6100 }, { "epoch": 0.39, "grad_norm": 1.3765705927758358, "learning_rate": 6.961911229568388e-06, "loss": 0.6551, "step": 6101 }, { "epoch": 0.39, "grad_norm": 3.560755424087223, "learning_rate": 6.9609577612639375e-06, "loss": 0.6512, "step": 6102 }, { "epoch": 0.39, "grad_norm": 1.6734675213556593, "learning_rate": 6.9600042086816424e-06, "loss": 0.6912, "step": 6103 }, { "epoch": 0.39, "grad_norm": 1.8772503570166745, "learning_rate": 6.959050571862485e-06, "loss": 0.7516, "step": 6104 }, { "epoch": 0.39, "grad_norm": 1.7328674221175364, "learning_rate": 6.958096850847451e-06, "loss": 0.6377, "step": 6105 }, { "epoch": 0.39, "grad_norm": 1.5541643444379651, "learning_rate": 6.957143045677528e-06, "loss": 0.7299, "step": 6106 }, { "epoch": 0.39, "grad_norm": 1.71027356849763, "learning_rate": 6.956189156393709e-06, "loss": 0.6768, "step": 6107 }, { "epoch": 0.39, "grad_norm": 1.5726665049307949, "learning_rate": 6.95523518303699e-06, "loss": 0.7273, "step": 6108 }, { "epoch": 0.39, "grad_norm": 1.4990090391091995, "learning_rate": 6.954281125648373e-06, "loss": 0.6736, "step": 6109 }, { "epoch": 0.39, "grad_norm": 1.3379489759923855, "learning_rate": 6.95332698426886e-06, "loss": 0.6509, "step": 6110 }, { "epoch": 0.39, "grad_norm": 1.5210366790775502, "learning_rate": 6.952372758939457e-06, "loss": 0.7076, "step": 6111 }, { "epoch": 0.39, "grad_norm": 1.5638664763271763, "learning_rate": 6.951418449701176e-06, "loss": 0.7107, "step": 6112 }, { "epoch": 0.39, "grad_norm": 1.7238745432742044, "learning_rate": 6.9504640565950295e-06, "loss": 0.7574, "step": 6113 }, { "epoch": 0.39, "grad_norm": 1.6449775273319496, "learning_rate": 6.949509579662037e-06, "loss": 0.6619, "step": 6114 }, { "epoch": 0.39, "grad_norm": 1.73766036244402, "learning_rate": 6.948555018943219e-06, "loss": 0.7465, "step": 6115 }, { "epoch": 0.39, "grad_norm": 2.060441466368408, "learning_rate": 6.947600374479602e-06, "loss": 0.7168, "step": 6116 }, { "epoch": 0.39, "grad_norm": 2.199765376469911, "learning_rate": 6.946645646312212e-06, "loss": 0.762, "step": 6117 }, { "epoch": 0.39, "grad_norm": 1.6666916175055888, "learning_rate": 6.945690834482082e-06, "loss": 0.7504, "step": 6118 }, { "epoch": 0.39, "grad_norm": 2.1149059035661537, "learning_rate": 6.944735939030249e-06, "loss": 0.5738, "step": 6119 }, { "epoch": 0.39, "grad_norm": 1.1709593499147624, "learning_rate": 6.943780959997753e-06, "loss": 0.6922, "step": 6120 }, { "epoch": 0.39, "grad_norm": 1.626851020754018, "learning_rate": 6.942825897425633e-06, "loss": 0.696, "step": 6121 }, { "epoch": 0.39, "grad_norm": 0.9813460798421834, "learning_rate": 6.94187075135494e-06, "loss": 0.6498, "step": 6122 }, { "epoch": 0.39, "grad_norm": 1.531843819088271, "learning_rate": 6.940915521826723e-06, "loss": 0.6409, "step": 6123 }, { "epoch": 0.39, "grad_norm": 1.6188717735128204, "learning_rate": 6.939960208882035e-06, "loss": 0.7615, "step": 6124 }, { "epoch": 0.39, "grad_norm": 1.5408512032459158, "learning_rate": 6.939004812561934e-06, "loss": 0.6629, "step": 6125 }, { "epoch": 0.39, "grad_norm": 1.72567983120352, "learning_rate": 6.938049332907481e-06, "loss": 0.692, "step": 6126 }, { "epoch": 0.39, "grad_norm": 1.9243104955871881, "learning_rate": 6.937093769959737e-06, "loss": 0.6098, "step": 6127 }, { "epoch": 0.39, "grad_norm": 1.729975588947848, "learning_rate": 6.9361381237597766e-06, "loss": 0.6609, "step": 6128 }, { "epoch": 0.39, "grad_norm": 1.0947000560837739, "learning_rate": 6.9351823943486654e-06, "loss": 0.597, "step": 6129 }, { "epoch": 0.39, "grad_norm": 1.5310705765705892, "learning_rate": 6.934226581767484e-06, "loss": 0.7179, "step": 6130 }, { "epoch": 0.39, "grad_norm": 1.7608559720035455, "learning_rate": 6.933270686057308e-06, "loss": 0.6892, "step": 6131 }, { "epoch": 0.39, "grad_norm": 1.5772702933148053, "learning_rate": 6.932314707259218e-06, "loss": 0.7014, "step": 6132 }, { "epoch": 0.39, "grad_norm": 1.6699208416232216, "learning_rate": 6.931358645414304e-06, "loss": 0.6428, "step": 6133 }, { "epoch": 0.39, "grad_norm": 1.3193216234630945, "learning_rate": 6.930402500563653e-06, "loss": 0.7025, "step": 6134 }, { "epoch": 0.39, "grad_norm": 1.548473540312669, "learning_rate": 6.929446272748361e-06, "loss": 0.6536, "step": 6135 }, { "epoch": 0.39, "grad_norm": 1.5356873222259835, "learning_rate": 6.928489962009519e-06, "loss": 0.6673, "step": 6136 }, { "epoch": 0.39, "grad_norm": 1.5922828437467484, "learning_rate": 6.927533568388232e-06, "loss": 0.7327, "step": 6137 }, { "epoch": 0.39, "grad_norm": 1.2791927528097806, "learning_rate": 6.926577091925604e-06, "loss": 0.7765, "step": 6138 }, { "epoch": 0.39, "grad_norm": 1.241371957772041, "learning_rate": 6.92562053266274e-06, "loss": 0.6837, "step": 6139 }, { "epoch": 0.39, "grad_norm": 1.3931802602696068, "learning_rate": 6.924663890640752e-06, "loss": 0.7278, "step": 6140 }, { "epoch": 0.39, "grad_norm": 1.6134017512629955, "learning_rate": 6.923707165900753e-06, "loss": 0.6244, "step": 6141 }, { "epoch": 0.39, "grad_norm": 1.565199941101689, "learning_rate": 6.922750358483865e-06, "loss": 0.7668, "step": 6142 }, { "epoch": 0.39, "grad_norm": 1.539780177336896, "learning_rate": 6.921793468431204e-06, "loss": 0.5957, "step": 6143 }, { "epoch": 0.39, "grad_norm": 1.3968699827699473, "learning_rate": 6.920836495783899e-06, "loss": 0.6206, "step": 6144 }, { "epoch": 0.39, "grad_norm": 1.8327585831016933, "learning_rate": 6.9198794405830776e-06, "loss": 0.6849, "step": 6145 }, { "epoch": 0.39, "grad_norm": 1.5082066035936206, "learning_rate": 6.918922302869873e-06, "loss": 0.6756, "step": 6146 }, { "epoch": 0.39, "grad_norm": 1.7210177002996814, "learning_rate": 6.917965082685418e-06, "loss": 0.697, "step": 6147 }, { "epoch": 0.39, "grad_norm": 1.678971595595874, "learning_rate": 6.917007780070856e-06, "loss": 0.7303, "step": 6148 }, { "epoch": 0.39, "grad_norm": 1.465967153535379, "learning_rate": 6.9160503950673276e-06, "loss": 0.6419, "step": 6149 }, { "epoch": 0.39, "grad_norm": 1.6461286670939452, "learning_rate": 6.915092927715979e-06, "loss": 0.6957, "step": 6150 }, { "epoch": 0.39, "grad_norm": 1.5915690578275323, "learning_rate": 6.914135378057959e-06, "loss": 0.6105, "step": 6151 }, { "epoch": 0.39, "grad_norm": 1.1500533612317962, "learning_rate": 6.9131777461344255e-06, "loss": 0.6932, "step": 6152 }, { "epoch": 0.39, "grad_norm": 1.7060453593926987, "learning_rate": 6.912220031986531e-06, "loss": 0.8072, "step": 6153 }, { "epoch": 0.39, "grad_norm": 1.5452569974210075, "learning_rate": 6.911262235655437e-06, "loss": 0.6676, "step": 6154 }, { "epoch": 0.39, "grad_norm": 1.5950329141321966, "learning_rate": 6.910304357182308e-06, "loss": 0.6773, "step": 6155 }, { "epoch": 0.39, "grad_norm": 1.9721956029733703, "learning_rate": 6.909346396608313e-06, "loss": 0.7815, "step": 6156 }, { "epoch": 0.39, "grad_norm": 1.5083801632515808, "learning_rate": 6.908388353974622e-06, "loss": 0.621, "step": 6157 }, { "epoch": 0.39, "grad_norm": 1.5349801578318865, "learning_rate": 6.907430229322409e-06, "loss": 0.6985, "step": 6158 }, { "epoch": 0.39, "grad_norm": 1.641643256957307, "learning_rate": 6.906472022692854e-06, "loss": 0.7277, "step": 6159 }, { "epoch": 0.39, "grad_norm": 1.0251602922527385, "learning_rate": 6.9055137341271365e-06, "loss": 0.7582, "step": 6160 }, { "epoch": 0.39, "grad_norm": 1.6416215395680873, "learning_rate": 6.904555363666443e-06, "loss": 0.7192, "step": 6161 }, { "epoch": 0.39, "grad_norm": 1.5394853025459907, "learning_rate": 6.903596911351962e-06, "loss": 0.7016, "step": 6162 }, { "epoch": 0.39, "grad_norm": 1.639708293589135, "learning_rate": 6.902638377224886e-06, "loss": 0.6678, "step": 6163 }, { "epoch": 0.39, "grad_norm": 1.3868566275616174, "learning_rate": 6.901679761326409e-06, "loss": 0.6783, "step": 6164 }, { "epoch": 0.39, "grad_norm": 1.6623547423019218, "learning_rate": 6.900721063697733e-06, "loss": 0.6934, "step": 6165 }, { "epoch": 0.39, "grad_norm": 1.67021029752093, "learning_rate": 6.899762284380059e-06, "loss": 0.7679, "step": 6166 }, { "epoch": 0.39, "grad_norm": 1.7669559112489712, "learning_rate": 6.898803423414595e-06, "loss": 0.7432, "step": 6167 }, { "epoch": 0.39, "grad_norm": 1.1996190951001886, "learning_rate": 6.89784448084255e-06, "loss": 0.6332, "step": 6168 }, { "epoch": 0.39, "grad_norm": 1.6127212221431682, "learning_rate": 6.896885456705137e-06, "loss": 0.6927, "step": 6169 }, { "epoch": 0.39, "grad_norm": 1.6069490354212084, "learning_rate": 6.895926351043573e-06, "loss": 0.7202, "step": 6170 }, { "epoch": 0.39, "grad_norm": 1.503126299819558, "learning_rate": 6.894967163899077e-06, "loss": 0.6316, "step": 6171 }, { "epoch": 0.4, "grad_norm": 1.004846201280876, "learning_rate": 6.894007895312875e-06, "loss": 0.6554, "step": 6172 }, { "epoch": 0.4, "grad_norm": 1.6497855842272975, "learning_rate": 6.893048545326193e-06, "loss": 0.7509, "step": 6173 }, { "epoch": 0.4, "grad_norm": 1.661075937115069, "learning_rate": 6.892089113980262e-06, "loss": 0.8347, "step": 6174 }, { "epoch": 0.4, "grad_norm": 1.9019856448976715, "learning_rate": 6.891129601316319e-06, "loss": 0.7152, "step": 6175 }, { "epoch": 0.4, "grad_norm": 1.7148624571432876, "learning_rate": 6.890170007375598e-06, "loss": 0.7102, "step": 6176 }, { "epoch": 0.4, "grad_norm": 1.6256076670789998, "learning_rate": 6.889210332199343e-06, "loss": 0.6168, "step": 6177 }, { "epoch": 0.4, "grad_norm": 1.4249717015619379, "learning_rate": 6.888250575828797e-06, "loss": 0.6826, "step": 6178 }, { "epoch": 0.4, "grad_norm": 1.5945312246821703, "learning_rate": 6.887290738305208e-06, "loss": 0.7447, "step": 6179 }, { "epoch": 0.4, "grad_norm": 1.7998706271335818, "learning_rate": 6.88633081966983e-06, "loss": 0.7248, "step": 6180 }, { "epoch": 0.4, "grad_norm": 1.460434457391608, "learning_rate": 6.885370819963917e-06, "loss": 0.671, "step": 6181 }, { "epoch": 0.4, "grad_norm": 1.057974708677261, "learning_rate": 6.884410739228727e-06, "loss": 0.624, "step": 6182 }, { "epoch": 0.4, "grad_norm": 1.6948079802824982, "learning_rate": 6.883450577505524e-06, "loss": 0.7401, "step": 6183 }, { "epoch": 0.4, "grad_norm": 1.618962634726243, "learning_rate": 6.882490334835572e-06, "loss": 0.6991, "step": 6184 }, { "epoch": 0.4, "grad_norm": 1.8971877915598114, "learning_rate": 6.881530011260142e-06, "loss": 0.5816, "step": 6185 }, { "epoch": 0.4, "grad_norm": 1.095987493058494, "learning_rate": 6.880569606820504e-06, "loss": 0.7378, "step": 6186 }, { "epoch": 0.4, "grad_norm": 1.518376319183017, "learning_rate": 6.879609121557938e-06, "loss": 0.6138, "step": 6187 }, { "epoch": 0.4, "grad_norm": 1.042517069689605, "learning_rate": 6.878648555513721e-06, "loss": 0.5991, "step": 6188 }, { "epoch": 0.4, "grad_norm": 1.517833975205777, "learning_rate": 6.877687908729137e-06, "loss": 0.706, "step": 6189 }, { "epoch": 0.4, "grad_norm": 1.5907340313980691, "learning_rate": 6.876727181245472e-06, "loss": 0.6721, "step": 6190 }, { "epoch": 0.4, "grad_norm": 1.707293847005642, "learning_rate": 6.875766373104016e-06, "loss": 0.6647, "step": 6191 }, { "epoch": 0.4, "grad_norm": 1.4367628844623261, "learning_rate": 6.874805484346062e-06, "loss": 0.6415, "step": 6192 }, { "epoch": 0.4, "grad_norm": 1.7014139960997536, "learning_rate": 6.873844515012909e-06, "loss": 0.6892, "step": 6193 }, { "epoch": 0.4, "grad_norm": 1.6437653251057378, "learning_rate": 6.872883465145855e-06, "loss": 0.7053, "step": 6194 }, { "epoch": 0.4, "grad_norm": 1.431421302291057, "learning_rate": 6.871922334786206e-06, "loss": 0.6584, "step": 6195 }, { "epoch": 0.4, "grad_norm": 1.4155117758681595, "learning_rate": 6.870961123975269e-06, "loss": 0.6957, "step": 6196 }, { "epoch": 0.4, "grad_norm": 1.2069510521090732, "learning_rate": 6.8699998327543545e-06, "loss": 0.7685, "step": 6197 }, { "epoch": 0.4, "grad_norm": 1.616289256407955, "learning_rate": 6.869038461164776e-06, "loss": 0.6909, "step": 6198 }, { "epoch": 0.4, "grad_norm": 1.3285508613007075, "learning_rate": 6.868077009247852e-06, "loss": 0.6255, "step": 6199 }, { "epoch": 0.4, "grad_norm": 1.8868287331230165, "learning_rate": 6.867115477044902e-06, "loss": 0.747, "step": 6200 }, { "epoch": 0.4, "grad_norm": 1.4765969995669352, "learning_rate": 6.866153864597254e-06, "loss": 0.71, "step": 6201 }, { "epoch": 0.4, "grad_norm": 1.1540849109697289, "learning_rate": 6.865192171946234e-06, "loss": 0.6441, "step": 6202 }, { "epoch": 0.4, "grad_norm": 1.3559899149868468, "learning_rate": 6.864230399133172e-06, "loss": 0.7485, "step": 6203 }, { "epoch": 0.4, "grad_norm": 1.4658707241929287, "learning_rate": 6.863268546199408e-06, "loss": 0.7166, "step": 6204 }, { "epoch": 0.4, "grad_norm": 1.458182971556563, "learning_rate": 6.862306613186275e-06, "loss": 0.6229, "step": 6205 }, { "epoch": 0.4, "grad_norm": 1.8743830586460124, "learning_rate": 6.861344600135118e-06, "loss": 0.6604, "step": 6206 }, { "epoch": 0.4, "grad_norm": 1.5984286143868798, "learning_rate": 6.86038250708728e-06, "loss": 0.7483, "step": 6207 }, { "epoch": 0.4, "grad_norm": 1.5944233757617003, "learning_rate": 6.859420334084111e-06, "loss": 0.6741, "step": 6208 }, { "epoch": 0.4, "grad_norm": 1.469228014828832, "learning_rate": 6.858458081166964e-06, "loss": 0.7338, "step": 6209 }, { "epoch": 0.4, "grad_norm": 1.304891342399288, "learning_rate": 6.857495748377193e-06, "loss": 0.6744, "step": 6210 }, { "epoch": 0.4, "grad_norm": 1.1312641836740265, "learning_rate": 6.856533335756159e-06, "loss": 0.5791, "step": 6211 }, { "epoch": 0.4, "grad_norm": 4.541723322626886, "learning_rate": 6.855570843345223e-06, "loss": 0.6839, "step": 6212 }, { "epoch": 0.4, "grad_norm": 1.6108188793252498, "learning_rate": 6.854608271185752e-06, "loss": 0.7111, "step": 6213 }, { "epoch": 0.4, "grad_norm": 1.427299220767811, "learning_rate": 6.853645619319114e-06, "loss": 0.6651, "step": 6214 }, { "epoch": 0.4, "grad_norm": 1.569419482321986, "learning_rate": 6.852682887786681e-06, "loss": 0.7137, "step": 6215 }, { "epoch": 0.4, "grad_norm": 1.7276608729830476, "learning_rate": 6.851720076629832e-06, "loss": 0.7406, "step": 6216 }, { "epoch": 0.4, "grad_norm": 1.624084413216096, "learning_rate": 6.850757185889945e-06, "loss": 0.7001, "step": 6217 }, { "epoch": 0.4, "grad_norm": 1.1663811623764884, "learning_rate": 6.849794215608403e-06, "loss": 0.7718, "step": 6218 }, { "epoch": 0.4, "grad_norm": 1.482624998231743, "learning_rate": 6.848831165826591e-06, "loss": 0.636, "step": 6219 }, { "epoch": 0.4, "grad_norm": 1.4815384901124125, "learning_rate": 6.847868036585903e-06, "loss": 0.7091, "step": 6220 }, { "epoch": 0.4, "grad_norm": 1.3206498436049983, "learning_rate": 6.846904827927728e-06, "loss": 0.6218, "step": 6221 }, { "epoch": 0.4, "grad_norm": 1.1312670929921689, "learning_rate": 6.845941539893465e-06, "loss": 0.6477, "step": 6222 }, { "epoch": 0.4, "grad_norm": 1.6817371566504848, "learning_rate": 6.844978172524514e-06, "loss": 0.7142, "step": 6223 }, { "epoch": 0.4, "grad_norm": 2.5204635527207175, "learning_rate": 6.844014725862277e-06, "loss": 0.6895, "step": 6224 }, { "epoch": 0.4, "grad_norm": 2.5210922950292485, "learning_rate": 6.843051199948162e-06, "loss": 0.7408, "step": 6225 }, { "epoch": 0.4, "grad_norm": 1.6037628226942808, "learning_rate": 6.84208759482358e-06, "loss": 0.6849, "step": 6226 }, { "epoch": 0.4, "grad_norm": 1.5695603610635802, "learning_rate": 6.841123910529943e-06, "loss": 0.7472, "step": 6227 }, { "epoch": 0.4, "grad_norm": 1.4686044752402092, "learning_rate": 6.840160147108669e-06, "loss": 0.7145, "step": 6228 }, { "epoch": 0.4, "grad_norm": 1.5751437288479184, "learning_rate": 6.839196304601179e-06, "loss": 0.6776, "step": 6229 }, { "epoch": 0.4, "grad_norm": 1.6455610407329997, "learning_rate": 6.838232383048896e-06, "loss": 0.7408, "step": 6230 }, { "epoch": 0.4, "grad_norm": 1.4865610599546197, "learning_rate": 6.837268382493248e-06, "loss": 0.6967, "step": 6231 }, { "epoch": 0.4, "grad_norm": 1.5453591021325128, "learning_rate": 6.836304302975667e-06, "loss": 0.8002, "step": 6232 }, { "epoch": 0.4, "grad_norm": 1.511385861481462, "learning_rate": 6.835340144537584e-06, "loss": 0.6563, "step": 6233 }, { "epoch": 0.4, "grad_norm": 1.692308269309093, "learning_rate": 6.83437590722044e-06, "loss": 0.8043, "step": 6234 }, { "epoch": 0.4, "grad_norm": 1.5770751210871001, "learning_rate": 6.833411591065673e-06, "loss": 0.6616, "step": 6235 }, { "epoch": 0.4, "grad_norm": 2.821498289421087, "learning_rate": 6.832447196114728e-06, "loss": 0.6732, "step": 6236 }, { "epoch": 0.4, "grad_norm": 1.5796913222047424, "learning_rate": 6.831482722409053e-06, "loss": 0.8398, "step": 6237 }, { "epoch": 0.4, "grad_norm": 1.5242854403098967, "learning_rate": 6.830518169990098e-06, "loss": 0.7828, "step": 6238 }, { "epoch": 0.4, "grad_norm": 2.0937712184415727, "learning_rate": 6.829553538899321e-06, "loss": 0.6145, "step": 6239 }, { "epoch": 0.4, "grad_norm": 2.7378757990708444, "learning_rate": 6.828588829178175e-06, "loss": 0.7632, "step": 6240 }, { "epoch": 0.4, "grad_norm": 1.696527928916628, "learning_rate": 6.8276240408681245e-06, "loss": 0.6758, "step": 6241 }, { "epoch": 0.4, "grad_norm": 1.6509542340446617, "learning_rate": 6.826659174010635e-06, "loss": 0.6669, "step": 6242 }, { "epoch": 0.4, "grad_norm": 1.6825100300861215, "learning_rate": 6.825694228647169e-06, "loss": 0.7952, "step": 6243 }, { "epoch": 0.4, "grad_norm": 1.4078673928910348, "learning_rate": 6.824729204819203e-06, "loss": 0.6686, "step": 6244 }, { "epoch": 0.4, "grad_norm": 1.5090645639623639, "learning_rate": 6.823764102568211e-06, "loss": 0.6502, "step": 6245 }, { "epoch": 0.4, "grad_norm": 1.4307406931768796, "learning_rate": 6.82279892193567e-06, "loss": 0.7655, "step": 6246 }, { "epoch": 0.4, "grad_norm": 2.330375978327345, "learning_rate": 6.82183366296306e-06, "loss": 0.6885, "step": 6247 }, { "epoch": 0.4, "grad_norm": 1.5407307252770481, "learning_rate": 6.820868325691867e-06, "loss": 0.6865, "step": 6248 }, { "epoch": 0.4, "grad_norm": 1.6665342596595, "learning_rate": 6.819902910163582e-06, "loss": 0.6687, "step": 6249 }, { "epoch": 0.4, "grad_norm": 1.5504353287063568, "learning_rate": 6.818937416419693e-06, "loss": 0.7019, "step": 6250 }, { "epoch": 0.4, "grad_norm": 1.684572318944192, "learning_rate": 6.817971844501695e-06, "loss": 0.6529, "step": 6251 }, { "epoch": 0.4, "grad_norm": 1.1299044415748882, "learning_rate": 6.817006194451088e-06, "loss": 0.5872, "step": 6252 }, { "epoch": 0.4, "grad_norm": 1.5828608563969306, "learning_rate": 6.8160404663093725e-06, "loss": 0.767, "step": 6253 }, { "epoch": 0.4, "grad_norm": 1.4510531728435188, "learning_rate": 6.815074660118055e-06, "loss": 0.7133, "step": 6254 }, { "epoch": 0.4, "grad_norm": 2.8524521210504497, "learning_rate": 6.814108775918642e-06, "loss": 0.6957, "step": 6255 }, { "epoch": 0.4, "grad_norm": 1.6021178206657514, "learning_rate": 6.813142813752645e-06, "loss": 0.6359, "step": 6256 }, { "epoch": 0.4, "grad_norm": 1.4876278243937195, "learning_rate": 6.812176773661579e-06, "loss": 0.6397, "step": 6257 }, { "epoch": 0.4, "grad_norm": 1.4720816897480449, "learning_rate": 6.8112106556869635e-06, "loss": 0.7015, "step": 6258 }, { "epoch": 0.4, "grad_norm": 1.5868724504782825, "learning_rate": 6.810244459870322e-06, "loss": 0.6742, "step": 6259 }, { "epoch": 0.4, "grad_norm": 1.586349285695322, "learning_rate": 6.809278186253177e-06, "loss": 0.673, "step": 6260 }, { "epoch": 0.4, "grad_norm": 1.6603269908832374, "learning_rate": 6.808311834877057e-06, "loss": 0.671, "step": 6261 }, { "epoch": 0.4, "grad_norm": 1.6808800286872052, "learning_rate": 6.807345405783494e-06, "loss": 0.6342, "step": 6262 }, { "epoch": 0.4, "grad_norm": 1.9532565435847826, "learning_rate": 6.806378899014023e-06, "loss": 0.6708, "step": 6263 }, { "epoch": 0.4, "grad_norm": 1.4510759249817546, "learning_rate": 6.805412314610181e-06, "loss": 0.6461, "step": 6264 }, { "epoch": 0.4, "grad_norm": 1.6241898949280973, "learning_rate": 6.804445652613514e-06, "loss": 0.7336, "step": 6265 }, { "epoch": 0.4, "grad_norm": 1.4498991688226661, "learning_rate": 6.803478913065563e-06, "loss": 0.6846, "step": 6266 }, { "epoch": 0.4, "grad_norm": 1.4812774299909128, "learning_rate": 6.802512096007879e-06, "loss": 0.6344, "step": 6267 }, { "epoch": 0.4, "grad_norm": 1.5724118594565022, "learning_rate": 6.801545201482012e-06, "loss": 0.6755, "step": 6268 }, { "epoch": 0.4, "grad_norm": 1.8097397977876715, "learning_rate": 6.800578229529519e-06, "loss": 0.6149, "step": 6269 }, { "epoch": 0.4, "grad_norm": 1.5713756321360208, "learning_rate": 6.799611180191956e-06, "loss": 0.6402, "step": 6270 }, { "epoch": 0.4, "grad_norm": 1.4606451614933695, "learning_rate": 6.798644053510886e-06, "loss": 0.6257, "step": 6271 }, { "epoch": 0.4, "grad_norm": 1.7520380415164978, "learning_rate": 6.797676849527875e-06, "loss": 0.7576, "step": 6272 }, { "epoch": 0.4, "grad_norm": 1.6942403603112208, "learning_rate": 6.796709568284488e-06, "loss": 0.653, "step": 6273 }, { "epoch": 0.4, "grad_norm": 1.6502186790539008, "learning_rate": 6.795742209822302e-06, "loss": 0.6509, "step": 6274 }, { "epoch": 0.4, "grad_norm": 1.7116349245823175, "learning_rate": 6.794774774182887e-06, "loss": 0.6415, "step": 6275 }, { "epoch": 0.4, "grad_norm": 1.6372758639279184, "learning_rate": 6.793807261407825e-06, "loss": 0.5543, "step": 6276 }, { "epoch": 0.4, "grad_norm": 1.814320467580634, "learning_rate": 6.792839671538696e-06, "loss": 0.7808, "step": 6277 }, { "epoch": 0.4, "grad_norm": 1.4801816967377879, "learning_rate": 6.791872004617086e-06, "loss": 0.7142, "step": 6278 }, { "epoch": 0.4, "grad_norm": 1.617355488418564, "learning_rate": 6.790904260684581e-06, "loss": 0.6395, "step": 6279 }, { "epoch": 0.4, "grad_norm": 1.636737932921668, "learning_rate": 6.789936439782774e-06, "loss": 0.6918, "step": 6280 }, { "epoch": 0.4, "grad_norm": 1.609968526730067, "learning_rate": 6.788968541953262e-06, "loss": 0.7176, "step": 6281 }, { "epoch": 0.4, "grad_norm": 1.120213388376686, "learning_rate": 6.7880005672376394e-06, "loss": 0.7703, "step": 6282 }, { "epoch": 0.4, "grad_norm": 2.2427740373803555, "learning_rate": 6.787032515677509e-06, "loss": 0.7428, "step": 6283 }, { "epoch": 0.4, "grad_norm": 1.4424796001119367, "learning_rate": 6.786064387314477e-06, "loss": 0.7305, "step": 6284 }, { "epoch": 0.4, "grad_norm": 1.5194825455458727, "learning_rate": 6.785096182190152e-06, "loss": 0.7006, "step": 6285 }, { "epoch": 0.4, "grad_norm": 1.7225275694780837, "learning_rate": 6.7841279003461425e-06, "loss": 0.6837, "step": 6286 }, { "epoch": 0.4, "grad_norm": 1.5305851752540283, "learning_rate": 6.783159541824065e-06, "loss": 0.7437, "step": 6287 }, { "epoch": 0.4, "grad_norm": 1.2548347413429595, "learning_rate": 6.782191106665536e-06, "loss": 0.6466, "step": 6288 }, { "epoch": 0.4, "grad_norm": 1.422995801485545, "learning_rate": 6.781222594912182e-06, "loss": 0.6941, "step": 6289 }, { "epoch": 0.4, "grad_norm": 1.6269800402180152, "learning_rate": 6.780254006605621e-06, "loss": 0.7128, "step": 6290 }, { "epoch": 0.4, "grad_norm": 1.4901431310571291, "learning_rate": 6.779285341787484e-06, "loss": 0.6925, "step": 6291 }, { "epoch": 0.4, "grad_norm": 1.5149863561128327, "learning_rate": 6.778316600499401e-06, "loss": 0.619, "step": 6292 }, { "epoch": 0.4, "grad_norm": 1.6503600068460915, "learning_rate": 6.7773477827830085e-06, "loss": 0.7609, "step": 6293 }, { "epoch": 0.4, "grad_norm": 1.4423038954342042, "learning_rate": 6.776378888679942e-06, "loss": 0.7627, "step": 6294 }, { "epoch": 0.4, "grad_norm": 1.8464387692496174, "learning_rate": 6.775409918231843e-06, "loss": 0.7543, "step": 6295 }, { "epoch": 0.4, "grad_norm": 1.675165720924696, "learning_rate": 6.774440871480359e-06, "loss": 0.7126, "step": 6296 }, { "epoch": 0.4, "grad_norm": 1.4762783509924255, "learning_rate": 6.773471748467133e-06, "loss": 0.6824, "step": 6297 }, { "epoch": 0.4, "grad_norm": 1.7619012153279248, "learning_rate": 6.772502549233819e-06, "loss": 0.8049, "step": 6298 }, { "epoch": 0.4, "grad_norm": 1.4580154285359033, "learning_rate": 6.771533273822069e-06, "loss": 0.715, "step": 6299 }, { "epoch": 0.4, "grad_norm": 1.7335356265723827, "learning_rate": 6.770563922273541e-06, "loss": 0.7362, "step": 6300 }, { "epoch": 0.4, "grad_norm": 1.5963421159723785, "learning_rate": 6.769594494629898e-06, "loss": 0.7188, "step": 6301 }, { "epoch": 0.4, "grad_norm": 1.1010583609514015, "learning_rate": 6.7686249909328e-06, "loss": 0.6498, "step": 6302 }, { "epoch": 0.4, "grad_norm": 1.8214785802872457, "learning_rate": 6.767655411223917e-06, "loss": 0.6646, "step": 6303 }, { "epoch": 0.4, "grad_norm": 1.3956706011355775, "learning_rate": 6.766685755544919e-06, "loss": 0.6501, "step": 6304 }, { "epoch": 0.4, "grad_norm": 1.5254182128470037, "learning_rate": 6.76571602393748e-06, "loss": 0.7082, "step": 6305 }, { "epoch": 0.4, "grad_norm": 1.7720176930477987, "learning_rate": 6.764746216443277e-06, "loss": 0.699, "step": 6306 }, { "epoch": 0.4, "grad_norm": 1.7802292299726024, "learning_rate": 6.7637763331039885e-06, "loss": 0.7303, "step": 6307 }, { "epoch": 0.4, "grad_norm": 1.7635090192265834, "learning_rate": 6.7628063739612985e-06, "loss": 0.6567, "step": 6308 }, { "epoch": 0.4, "grad_norm": 1.5838629848786312, "learning_rate": 6.761836339056896e-06, "loss": 0.7618, "step": 6309 }, { "epoch": 0.4, "grad_norm": 1.7428857441464278, "learning_rate": 6.76086622843247e-06, "loss": 0.7214, "step": 6310 }, { "epoch": 0.4, "grad_norm": 1.62926813989892, "learning_rate": 6.759896042129713e-06, "loss": 0.7013, "step": 6311 }, { "epoch": 0.4, "grad_norm": 1.4724696612533852, "learning_rate": 6.758925780190322e-06, "loss": 0.7133, "step": 6312 }, { "epoch": 0.4, "grad_norm": 1.3833011721498196, "learning_rate": 6.757955442655998e-06, "loss": 0.7552, "step": 6313 }, { "epoch": 0.4, "grad_norm": 1.5434370486856615, "learning_rate": 6.756985029568443e-06, "loss": 0.7716, "step": 6314 }, { "epoch": 0.4, "grad_norm": 1.4583215397608202, "learning_rate": 6.756014540969362e-06, "loss": 0.7964, "step": 6315 }, { "epoch": 0.4, "grad_norm": 1.4961087887166573, "learning_rate": 6.755043976900467e-06, "loss": 0.6797, "step": 6316 }, { "epoch": 0.4, "grad_norm": 1.3954937017542266, "learning_rate": 6.754073337403469e-06, "loss": 0.7396, "step": 6317 }, { "epoch": 0.4, "grad_norm": 1.5911327211609203, "learning_rate": 6.753102622520087e-06, "loss": 0.6611, "step": 6318 }, { "epoch": 0.4, "grad_norm": 1.4057921748944981, "learning_rate": 6.752131832292036e-06, "loss": 0.6647, "step": 6319 }, { "epoch": 0.4, "grad_norm": 1.6430660228449705, "learning_rate": 6.751160966761041e-06, "loss": 0.7157, "step": 6320 }, { "epoch": 0.4, "grad_norm": 1.3043192741318828, "learning_rate": 6.750190025968827e-06, "loss": 0.681, "step": 6321 }, { "epoch": 0.4, "grad_norm": 1.638371196782071, "learning_rate": 6.749219009957122e-06, "loss": 0.657, "step": 6322 }, { "epoch": 0.4, "grad_norm": 1.5035398482328672, "learning_rate": 6.748247918767662e-06, "loss": 0.7537, "step": 6323 }, { "epoch": 0.4, "grad_norm": 1.4445855453334082, "learning_rate": 6.74727675244218e-06, "loss": 0.6696, "step": 6324 }, { "epoch": 0.4, "grad_norm": 1.5407085764155681, "learning_rate": 6.746305511022414e-06, "loss": 0.6895, "step": 6325 }, { "epoch": 0.4, "grad_norm": 1.6401757114738011, "learning_rate": 6.745334194550106e-06, "loss": 0.6946, "step": 6326 }, { "epoch": 0.4, "grad_norm": 1.0359300965427238, "learning_rate": 6.744362803067003e-06, "loss": 0.5515, "step": 6327 }, { "epoch": 0.41, "grad_norm": 1.3297410738384012, "learning_rate": 6.7433913366148515e-06, "loss": 0.6365, "step": 6328 }, { "epoch": 0.41, "grad_norm": 1.3474487934282586, "learning_rate": 6.742419795235403e-06, "loss": 0.6129, "step": 6329 }, { "epoch": 0.41, "grad_norm": 1.4870721892006018, "learning_rate": 6.741448178970413e-06, "loss": 0.6871, "step": 6330 }, { "epoch": 0.41, "grad_norm": 3.0183914175211055, "learning_rate": 6.74047648786164e-06, "loss": 0.6972, "step": 6331 }, { "epoch": 0.41, "grad_norm": 1.5866513388935148, "learning_rate": 6.739504721950845e-06, "loss": 0.7355, "step": 6332 }, { "epoch": 0.41, "grad_norm": 1.4677753757907142, "learning_rate": 6.738532881279794e-06, "loss": 0.6728, "step": 6333 }, { "epoch": 0.41, "grad_norm": 1.6357620735353469, "learning_rate": 6.7375609658902505e-06, "loss": 0.6723, "step": 6334 }, { "epoch": 0.41, "grad_norm": 1.6320130962423118, "learning_rate": 6.73658897582399e-06, "loss": 0.6472, "step": 6335 }, { "epoch": 0.41, "grad_norm": 1.6228310061746438, "learning_rate": 6.735616911122782e-06, "loss": 0.6668, "step": 6336 }, { "epoch": 0.41, "grad_norm": 1.8462678635034753, "learning_rate": 6.734644771828407e-06, "loss": 0.6828, "step": 6337 }, { "epoch": 0.41, "grad_norm": 1.6640916477667989, "learning_rate": 6.733672557982645e-06, "loss": 0.7725, "step": 6338 }, { "epoch": 0.41, "grad_norm": 1.5831429938271178, "learning_rate": 6.7327002696272795e-06, "loss": 0.711, "step": 6339 }, { "epoch": 0.41, "grad_norm": 2.174709430335132, "learning_rate": 6.7317279068040965e-06, "loss": 0.6549, "step": 6340 }, { "epoch": 0.41, "grad_norm": 1.6653515270567762, "learning_rate": 6.730755469554888e-06, "loss": 0.683, "step": 6341 }, { "epoch": 0.41, "grad_norm": 1.606021014866538, "learning_rate": 6.729782957921446e-06, "loss": 0.7204, "step": 6342 }, { "epoch": 0.41, "grad_norm": 1.487748770697338, "learning_rate": 6.728810371945567e-06, "loss": 0.6901, "step": 6343 }, { "epoch": 0.41, "grad_norm": 1.5462903878060752, "learning_rate": 6.72783771166905e-06, "loss": 0.7854, "step": 6344 }, { "epoch": 0.41, "grad_norm": 1.8929359398835026, "learning_rate": 6.7268649771337e-06, "loss": 0.7563, "step": 6345 }, { "epoch": 0.41, "grad_norm": 1.525338844015877, "learning_rate": 6.725892168381323e-06, "loss": 0.7312, "step": 6346 }, { "epoch": 0.41, "grad_norm": 1.4663365606450192, "learning_rate": 6.724919285453726e-06, "loss": 0.611, "step": 6347 }, { "epoch": 0.41, "grad_norm": 1.6486180059432725, "learning_rate": 6.723946328392722e-06, "loss": 0.7339, "step": 6348 }, { "epoch": 0.41, "grad_norm": 1.5005049060118854, "learning_rate": 6.722973297240128e-06, "loss": 0.7032, "step": 6349 }, { "epoch": 0.41, "grad_norm": 1.860142168408163, "learning_rate": 6.722000192037761e-06, "loss": 0.6566, "step": 6350 }, { "epoch": 0.41, "grad_norm": 1.6330903092673292, "learning_rate": 6.721027012827444e-06, "loss": 0.6577, "step": 6351 }, { "epoch": 0.41, "grad_norm": 1.9648616359898508, "learning_rate": 6.720053759651005e-06, "loss": 0.7121, "step": 6352 }, { "epoch": 0.41, "grad_norm": 1.6168186203600232, "learning_rate": 6.719080432550269e-06, "loss": 0.7271, "step": 6353 }, { "epoch": 0.41, "grad_norm": 1.2860306102371661, "learning_rate": 6.718107031567067e-06, "loss": 0.6505, "step": 6354 }, { "epoch": 0.41, "grad_norm": 1.8997687562273908, "learning_rate": 6.7171335567432365e-06, "loss": 0.6375, "step": 6355 }, { "epoch": 0.41, "grad_norm": 1.8181682878982257, "learning_rate": 6.716160008120613e-06, "loss": 0.6554, "step": 6356 }, { "epoch": 0.41, "grad_norm": 1.5178621599950122, "learning_rate": 6.7151863857410375e-06, "loss": 0.5612, "step": 6357 }, { "epoch": 0.41, "grad_norm": 1.6324936872390219, "learning_rate": 6.714212689646355e-06, "loss": 0.7296, "step": 6358 }, { "epoch": 0.41, "grad_norm": 1.6597600165915731, "learning_rate": 6.713238919878415e-06, "loss": 0.6319, "step": 6359 }, { "epoch": 0.41, "grad_norm": 1.0686570175096626, "learning_rate": 6.712265076479067e-06, "loss": 0.7455, "step": 6360 }, { "epoch": 0.41, "grad_norm": 1.6531671857918198, "learning_rate": 6.711291159490162e-06, "loss": 0.8376, "step": 6361 }, { "epoch": 0.41, "grad_norm": 2.5860186436476265, "learning_rate": 6.71031716895356e-06, "loss": 0.7361, "step": 6362 }, { "epoch": 0.41, "grad_norm": 1.6209180410811221, "learning_rate": 6.709343104911119e-06, "loss": 0.6818, "step": 6363 }, { "epoch": 0.41, "grad_norm": 2.6774493082610227, "learning_rate": 6.708368967404704e-06, "loss": 0.6644, "step": 6364 }, { "epoch": 0.41, "grad_norm": 1.1920814902369887, "learning_rate": 6.70739475647618e-06, "loss": 0.6637, "step": 6365 }, { "epoch": 0.41, "grad_norm": 1.6021225226294895, "learning_rate": 6.706420472167417e-06, "loss": 0.6981, "step": 6366 }, { "epoch": 0.41, "grad_norm": 1.5476615702992738, "learning_rate": 6.705446114520289e-06, "loss": 0.7656, "step": 6367 }, { "epoch": 0.41, "grad_norm": 1.816518451429053, "learning_rate": 6.704471683576669e-06, "loss": 0.7009, "step": 6368 }, { "epoch": 0.41, "grad_norm": 1.6373041476037402, "learning_rate": 6.7034971793784385e-06, "loss": 0.6823, "step": 6369 }, { "epoch": 0.41, "grad_norm": 2.0114337286848953, "learning_rate": 6.702522601967479e-06, "loss": 0.7418, "step": 6370 }, { "epoch": 0.41, "grad_norm": 1.2176688054630347, "learning_rate": 6.701547951385675e-06, "loss": 0.7024, "step": 6371 }, { "epoch": 0.41, "grad_norm": 1.4401705729777812, "learning_rate": 6.700573227674916e-06, "loss": 0.7287, "step": 6372 }, { "epoch": 0.41, "grad_norm": 1.6562589669167775, "learning_rate": 6.699598430877092e-06, "loss": 0.6636, "step": 6373 }, { "epoch": 0.41, "grad_norm": 1.5466966081580187, "learning_rate": 6.6986235610341e-06, "loss": 0.6686, "step": 6374 }, { "epoch": 0.41, "grad_norm": 1.6073002641457763, "learning_rate": 6.697648618187836e-06, "loss": 0.7054, "step": 6375 }, { "epoch": 0.41, "grad_norm": 1.7190467153176063, "learning_rate": 6.696673602380203e-06, "loss": 0.7487, "step": 6376 }, { "epoch": 0.41, "grad_norm": 1.3988491158357614, "learning_rate": 6.6956985136531015e-06, "loss": 0.6081, "step": 6377 }, { "epoch": 0.41, "grad_norm": 1.469452382592368, "learning_rate": 6.694723352048442e-06, "loss": 0.6462, "step": 6378 }, { "epoch": 0.41, "grad_norm": 1.481621521529079, "learning_rate": 6.693748117608134e-06, "loss": 0.7206, "step": 6379 }, { "epoch": 0.41, "grad_norm": 1.0640482635941615, "learning_rate": 6.69277281037409e-06, "loss": 0.6234, "step": 6380 }, { "epoch": 0.41, "grad_norm": 1.5219147862512108, "learning_rate": 6.69179743038823e-06, "loss": 0.6281, "step": 6381 }, { "epoch": 0.41, "grad_norm": 1.4017691741841645, "learning_rate": 6.69082197769247e-06, "loss": 0.6687, "step": 6382 }, { "epoch": 0.41, "grad_norm": 1.4145058698892328, "learning_rate": 6.6898464523287354e-06, "loss": 0.6247, "step": 6383 }, { "epoch": 0.41, "grad_norm": 1.5610778392589761, "learning_rate": 6.68887085433895e-06, "loss": 0.6521, "step": 6384 }, { "epoch": 0.41, "grad_norm": 1.432432823180801, "learning_rate": 6.687895183765043e-06, "loss": 0.6984, "step": 6385 }, { "epoch": 0.41, "grad_norm": 1.4104023613036105, "learning_rate": 6.686919440648949e-06, "loss": 0.7015, "step": 6386 }, { "epoch": 0.41, "grad_norm": 1.2243869150019824, "learning_rate": 6.685943625032602e-06, "loss": 0.6415, "step": 6387 }, { "epoch": 0.41, "grad_norm": 1.6657214226925023, "learning_rate": 6.684967736957941e-06, "loss": 0.6861, "step": 6388 }, { "epoch": 0.41, "grad_norm": 0.9901312573752251, "learning_rate": 6.683991776466907e-06, "loss": 0.7123, "step": 6389 }, { "epoch": 0.41, "grad_norm": 2.0088727442423413, "learning_rate": 6.683015743601445e-06, "loss": 0.716, "step": 6390 }, { "epoch": 0.41, "grad_norm": 1.7132406265730702, "learning_rate": 6.682039638403503e-06, "loss": 0.6917, "step": 6391 }, { "epoch": 0.41, "grad_norm": 1.4787026241433872, "learning_rate": 6.681063460915033e-06, "loss": 0.6971, "step": 6392 }, { "epoch": 0.41, "grad_norm": 1.5279055998966276, "learning_rate": 6.6800872111779854e-06, "loss": 0.574, "step": 6393 }, { "epoch": 0.41, "grad_norm": 1.4707341075572617, "learning_rate": 6.679110889234322e-06, "loss": 0.6603, "step": 6394 }, { "epoch": 0.41, "grad_norm": 1.5897130342975383, "learning_rate": 6.678134495125999e-06, "loss": 0.6434, "step": 6395 }, { "epoch": 0.41, "grad_norm": 1.5659855546634043, "learning_rate": 6.677158028894983e-06, "loss": 0.6641, "step": 6396 }, { "epoch": 0.41, "grad_norm": 1.447466058256484, "learning_rate": 6.676181490583238e-06, "loss": 0.6993, "step": 6397 }, { "epoch": 0.41, "grad_norm": 1.5456023865990822, "learning_rate": 6.675204880232735e-06, "loss": 0.6619, "step": 6398 }, { "epoch": 0.41, "grad_norm": 1.5143823813484387, "learning_rate": 6.674228197885448e-06, "loss": 0.8083, "step": 6399 }, { "epoch": 0.41, "grad_norm": 1.4478700405048652, "learning_rate": 6.6732514435833485e-06, "loss": 0.6267, "step": 6400 }, { "epoch": 0.41, "grad_norm": 1.5990872704611898, "learning_rate": 6.67227461736842e-06, "loss": 0.6962, "step": 6401 }, { "epoch": 0.41, "grad_norm": 1.7270672720903835, "learning_rate": 6.671297719282641e-06, "loss": 0.7695, "step": 6402 }, { "epoch": 0.41, "grad_norm": 1.8232887782890055, "learning_rate": 6.670320749367998e-06, "loss": 0.6414, "step": 6403 }, { "epoch": 0.41, "grad_norm": 1.6997133491324368, "learning_rate": 6.6693437076664795e-06, "loss": 0.6658, "step": 6404 }, { "epoch": 0.41, "grad_norm": 1.487453131649543, "learning_rate": 6.668366594220076e-06, "loss": 0.629, "step": 6405 }, { "epoch": 0.41, "grad_norm": 1.6450066704271007, "learning_rate": 6.667389409070782e-06, "loss": 0.6665, "step": 6406 }, { "epoch": 0.41, "grad_norm": 1.358896486045401, "learning_rate": 6.666412152260595e-06, "loss": 0.5594, "step": 6407 }, { "epoch": 0.41, "grad_norm": 1.819483699865816, "learning_rate": 6.665434823831515e-06, "loss": 0.6791, "step": 6408 }, { "epoch": 0.41, "grad_norm": 1.727025715915611, "learning_rate": 6.664457423825547e-06, "loss": 0.6559, "step": 6409 }, { "epoch": 0.41, "grad_norm": 1.420773389755776, "learning_rate": 6.663479952284695e-06, "loss": 0.6976, "step": 6410 }, { "epoch": 0.41, "grad_norm": 1.7472566429506318, "learning_rate": 6.662502409250971e-06, "loss": 0.6924, "step": 6411 }, { "epoch": 0.41, "grad_norm": 1.3202515755038724, "learning_rate": 6.661524794766387e-06, "loss": 0.6328, "step": 6412 }, { "epoch": 0.41, "grad_norm": 1.1026133734644386, "learning_rate": 6.660547108872959e-06, "loss": 0.7002, "step": 6413 }, { "epoch": 0.41, "grad_norm": 1.711730870108576, "learning_rate": 6.659569351612704e-06, "loss": 0.7637, "step": 6414 }, { "epoch": 0.41, "grad_norm": 1.3908661954907366, "learning_rate": 6.6585915230276445e-06, "loss": 0.6485, "step": 6415 }, { "epoch": 0.41, "grad_norm": 1.6243607936804956, "learning_rate": 6.657613623159808e-06, "loss": 0.633, "step": 6416 }, { "epoch": 0.41, "grad_norm": 1.134284591861518, "learning_rate": 6.656635652051222e-06, "loss": 0.7313, "step": 6417 }, { "epoch": 0.41, "grad_norm": 1.5411145983347354, "learning_rate": 6.655657609743917e-06, "loss": 0.7505, "step": 6418 }, { "epoch": 0.41, "grad_norm": 1.5338556920493522, "learning_rate": 6.654679496279925e-06, "loss": 0.7019, "step": 6419 }, { "epoch": 0.41, "grad_norm": 1.4410215115968545, "learning_rate": 6.653701311701288e-06, "loss": 0.7447, "step": 6420 }, { "epoch": 0.41, "grad_norm": 1.6102954862336083, "learning_rate": 6.6527230560500415e-06, "loss": 0.6801, "step": 6421 }, { "epoch": 0.41, "grad_norm": 1.590808332770738, "learning_rate": 6.65174472936823e-06, "loss": 0.6807, "step": 6422 }, { "epoch": 0.41, "grad_norm": 1.5918528825483902, "learning_rate": 6.6507663316979025e-06, "loss": 0.6559, "step": 6423 }, { "epoch": 0.41, "grad_norm": 1.470999970413456, "learning_rate": 6.649787863081107e-06, "loss": 0.6504, "step": 6424 }, { "epoch": 0.41, "grad_norm": 1.6221179508250485, "learning_rate": 6.648809323559895e-06, "loss": 0.6933, "step": 6425 }, { "epoch": 0.41, "grad_norm": 8.005664277742046, "learning_rate": 6.6478307131763235e-06, "loss": 0.7582, "step": 6426 }, { "epoch": 0.41, "grad_norm": 1.5975788824708064, "learning_rate": 6.64685203197245e-06, "loss": 0.6835, "step": 6427 }, { "epoch": 0.41, "grad_norm": 1.475504821027097, "learning_rate": 6.645873279990337e-06, "loss": 0.7777, "step": 6428 }, { "epoch": 0.41, "grad_norm": 1.6115064094739622, "learning_rate": 6.644894457272048e-06, "loss": 0.6083, "step": 6429 }, { "epoch": 0.41, "grad_norm": 1.6977156975184287, "learning_rate": 6.643915563859652e-06, "loss": 0.6293, "step": 6430 }, { "epoch": 0.41, "grad_norm": 1.4972272151470414, "learning_rate": 6.64293659979522e-06, "loss": 0.6526, "step": 6431 }, { "epoch": 0.41, "grad_norm": 1.9871756619442693, "learning_rate": 6.641957565120824e-06, "loss": 0.7451, "step": 6432 }, { "epoch": 0.41, "grad_norm": 1.5808163048513533, "learning_rate": 6.640978459878543e-06, "loss": 0.6997, "step": 6433 }, { "epoch": 0.41, "grad_norm": 1.032758680793078, "learning_rate": 6.639999284110457e-06, "loss": 0.6619, "step": 6434 }, { "epoch": 0.41, "grad_norm": 1.6236074960995204, "learning_rate": 6.639020037858647e-06, "loss": 0.7596, "step": 6435 }, { "epoch": 0.41, "grad_norm": 1.375830595617975, "learning_rate": 6.638040721165199e-06, "loss": 0.7396, "step": 6436 }, { "epoch": 0.41, "grad_norm": 1.5609800040467579, "learning_rate": 6.637061334072204e-06, "loss": 0.7024, "step": 6437 }, { "epoch": 0.41, "grad_norm": 1.604341904970044, "learning_rate": 6.636081876621752e-06, "loss": 0.7665, "step": 6438 }, { "epoch": 0.41, "grad_norm": 1.7114870929852435, "learning_rate": 6.635102348855939e-06, "loss": 0.6426, "step": 6439 }, { "epoch": 0.41, "grad_norm": 1.6929674109747745, "learning_rate": 6.634122750816863e-06, "loss": 0.7555, "step": 6440 }, { "epoch": 0.41, "grad_norm": 1.7310044425212714, "learning_rate": 6.6331430825466245e-06, "loss": 0.7461, "step": 6441 }, { "epoch": 0.41, "grad_norm": 1.8381421640266629, "learning_rate": 6.632163344087328e-06, "loss": 0.7612, "step": 6442 }, { "epoch": 0.41, "grad_norm": 1.1718215006174613, "learning_rate": 6.631183535481082e-06, "loss": 0.6402, "step": 6443 }, { "epoch": 0.41, "grad_norm": 1.1002616151950544, "learning_rate": 6.630203656769994e-06, "loss": 0.6533, "step": 6444 }, { "epoch": 0.41, "grad_norm": 1.6597347974886216, "learning_rate": 6.629223707996177e-06, "loss": 0.7219, "step": 6445 }, { "epoch": 0.41, "grad_norm": 1.5469236765395358, "learning_rate": 6.628243689201752e-06, "loss": 0.6474, "step": 6446 }, { "epoch": 0.41, "grad_norm": 1.762412439779821, "learning_rate": 6.627263600428833e-06, "loss": 0.846, "step": 6447 }, { "epoch": 0.41, "grad_norm": 1.4684778735101904, "learning_rate": 6.6262834417195434e-06, "loss": 0.677, "step": 6448 }, { "epoch": 0.41, "grad_norm": 1.4812378987937074, "learning_rate": 6.625303213116009e-06, "loss": 0.6677, "step": 6449 }, { "epoch": 0.41, "grad_norm": 1.82541319299963, "learning_rate": 6.624322914660357e-06, "loss": 0.7196, "step": 6450 }, { "epoch": 0.41, "grad_norm": 1.4868394811808485, "learning_rate": 6.62334254639472e-06, "loss": 0.6531, "step": 6451 }, { "epoch": 0.41, "grad_norm": 1.5470093137547982, "learning_rate": 6.622362108361231e-06, "loss": 0.6894, "step": 6452 }, { "epoch": 0.41, "grad_norm": 1.407095553695618, "learning_rate": 6.621381600602028e-06, "loss": 0.694, "step": 6453 }, { "epoch": 0.41, "grad_norm": 1.6629000341918925, "learning_rate": 6.620401023159251e-06, "loss": 0.6846, "step": 6454 }, { "epoch": 0.41, "grad_norm": 1.4954016995828512, "learning_rate": 6.619420376075043e-06, "loss": 0.7991, "step": 6455 }, { "epoch": 0.41, "grad_norm": 1.5405592029377095, "learning_rate": 6.618439659391551e-06, "loss": 0.7148, "step": 6456 }, { "epoch": 0.41, "grad_norm": 1.5620609785778925, "learning_rate": 6.617458873150922e-06, "loss": 0.7926, "step": 6457 }, { "epoch": 0.41, "grad_norm": 1.4268709131109047, "learning_rate": 6.616478017395309e-06, "loss": 0.636, "step": 6458 }, { "epoch": 0.41, "grad_norm": 1.826449623239284, "learning_rate": 6.6154970921668685e-06, "loss": 0.7712, "step": 6459 }, { "epoch": 0.41, "grad_norm": 1.58679208156439, "learning_rate": 6.614516097507756e-06, "loss": 0.6562, "step": 6460 }, { "epoch": 0.41, "grad_norm": 1.1906889747921658, "learning_rate": 6.613535033460135e-06, "loss": 0.7981, "step": 6461 }, { "epoch": 0.41, "grad_norm": 1.4198609408243068, "learning_rate": 6.6125539000661694e-06, "loss": 0.6687, "step": 6462 }, { "epoch": 0.41, "grad_norm": 1.6173239083302104, "learning_rate": 6.611572697368026e-06, "loss": 0.7572, "step": 6463 }, { "epoch": 0.41, "grad_norm": 1.1837572310135964, "learning_rate": 6.610591425407875e-06, "loss": 0.7262, "step": 6464 }, { "epoch": 0.41, "grad_norm": 1.4561996394610266, "learning_rate": 6.6096100842278865e-06, "loss": 0.6829, "step": 6465 }, { "epoch": 0.41, "grad_norm": 1.5873448783178203, "learning_rate": 6.60862867387024e-06, "loss": 0.6338, "step": 6466 }, { "epoch": 0.41, "grad_norm": 1.0202796703526447, "learning_rate": 6.607647194377113e-06, "loss": 0.6284, "step": 6467 }, { "epoch": 0.41, "grad_norm": 1.4220772239378627, "learning_rate": 6.606665645790689e-06, "loss": 0.6796, "step": 6468 }, { "epoch": 0.41, "grad_norm": 1.5030285870009215, "learning_rate": 6.60568402815315e-06, "loss": 0.6179, "step": 6469 }, { "epoch": 0.41, "grad_norm": 1.435800201615243, "learning_rate": 6.604702341506686e-06, "loss": 0.718, "step": 6470 }, { "epoch": 0.41, "grad_norm": 1.567105506402482, "learning_rate": 6.603720585893487e-06, "loss": 0.7667, "step": 6471 }, { "epoch": 0.41, "grad_norm": 1.4595177009487363, "learning_rate": 6.602738761355747e-06, "loss": 0.739, "step": 6472 }, { "epoch": 0.41, "grad_norm": 1.4922077986502513, "learning_rate": 6.601756867935664e-06, "loss": 0.6111, "step": 6473 }, { "epoch": 0.41, "grad_norm": 1.537125991937981, "learning_rate": 6.600774905675436e-06, "loss": 0.7132, "step": 6474 }, { "epoch": 0.41, "grad_norm": 1.4398671782985584, "learning_rate": 6.599792874617267e-06, "loss": 0.7201, "step": 6475 }, { "epoch": 0.41, "grad_norm": 1.4996006988081623, "learning_rate": 6.598810774803361e-06, "loss": 0.7038, "step": 6476 }, { "epoch": 0.41, "grad_norm": 1.4795550117009488, "learning_rate": 6.597828606275928e-06, "loss": 0.7081, "step": 6477 }, { "epoch": 0.41, "grad_norm": 1.6184940422675977, "learning_rate": 6.5968463690771775e-06, "loss": 0.6888, "step": 6478 }, { "epoch": 0.41, "grad_norm": 1.686745685485642, "learning_rate": 6.595864063249326e-06, "loss": 0.752, "step": 6479 }, { "epoch": 0.41, "grad_norm": 1.8805707596352874, "learning_rate": 6.5948816888345915e-06, "loss": 0.7666, "step": 6480 }, { "epoch": 0.41, "grad_norm": 1.174713353131087, "learning_rate": 6.593899245875193e-06, "loss": 0.6895, "step": 6481 }, { "epoch": 0.41, "grad_norm": 1.5997114400757897, "learning_rate": 6.592916734413354e-06, "loss": 0.7644, "step": 6482 }, { "epoch": 0.41, "grad_norm": 1.5678375304345755, "learning_rate": 6.591934154491301e-06, "loss": 0.8051, "step": 6483 }, { "epoch": 0.42, "grad_norm": 1.6540020695978808, "learning_rate": 6.590951506151263e-06, "loss": 0.6609, "step": 6484 }, { "epoch": 0.42, "grad_norm": 1.4040399779642592, "learning_rate": 6.589968789435472e-06, "loss": 0.6661, "step": 6485 }, { "epoch": 0.42, "grad_norm": 1.3512080426910411, "learning_rate": 6.5889860043861644e-06, "loss": 0.6422, "step": 6486 }, { "epoch": 0.42, "grad_norm": 1.485637524697984, "learning_rate": 6.588003151045577e-06, "loss": 0.6695, "step": 6487 }, { "epoch": 0.42, "grad_norm": 1.6855476906123608, "learning_rate": 6.58702022945595e-06, "loss": 0.7754, "step": 6488 }, { "epoch": 0.42, "grad_norm": 1.6267145811147181, "learning_rate": 6.586037239659529e-06, "loss": 0.6573, "step": 6489 }, { "epoch": 0.42, "grad_norm": 1.342039235900548, "learning_rate": 6.5850541816985595e-06, "loss": 0.6704, "step": 6490 }, { "epoch": 0.42, "grad_norm": 1.668274117841916, "learning_rate": 6.584071055615293e-06, "loss": 0.7397, "step": 6491 }, { "epoch": 0.42, "grad_norm": 1.539806249287055, "learning_rate": 6.5830878614519815e-06, "loss": 0.6301, "step": 6492 }, { "epoch": 0.42, "grad_norm": 1.6116280667780307, "learning_rate": 6.582104599250878e-06, "loss": 0.7343, "step": 6493 }, { "epoch": 0.42, "grad_norm": 1.6031933268153242, "learning_rate": 6.581121269054244e-06, "loss": 0.6916, "step": 6494 }, { "epoch": 0.42, "grad_norm": 1.439966582899403, "learning_rate": 6.580137870904342e-06, "loss": 0.6025, "step": 6495 }, { "epoch": 0.42, "grad_norm": 1.4615853728826933, "learning_rate": 6.5791544048434346e-06, "loss": 0.6437, "step": 6496 }, { "epoch": 0.42, "grad_norm": 1.6244169937411652, "learning_rate": 6.578170870913787e-06, "loss": 0.7249, "step": 6497 }, { "epoch": 0.42, "grad_norm": 1.5944287379363187, "learning_rate": 6.577187269157672e-06, "loss": 0.7001, "step": 6498 }, { "epoch": 0.42, "grad_norm": 1.5598827914444622, "learning_rate": 6.576203599617363e-06, "loss": 0.7963, "step": 6499 }, { "epoch": 0.42, "grad_norm": 1.5290137716504975, "learning_rate": 6.575219862335136e-06, "loss": 0.6605, "step": 6500 }, { "epoch": 0.42, "grad_norm": 1.5276411636202496, "learning_rate": 6.574236057353268e-06, "loss": 0.7593, "step": 6501 }, { "epoch": 0.42, "grad_norm": 1.8158382990576571, "learning_rate": 6.573252184714043e-06, "loss": 0.7924, "step": 6502 }, { "epoch": 0.42, "grad_norm": 1.3375304724507378, "learning_rate": 6.572268244459745e-06, "loss": 0.671, "step": 6503 }, { "epoch": 0.42, "grad_norm": 1.84414926353401, "learning_rate": 6.5712842366326625e-06, "loss": 0.7148, "step": 6504 }, { "epoch": 0.42, "grad_norm": 1.5008700451668702, "learning_rate": 6.570300161275084e-06, "loss": 0.7151, "step": 6505 }, { "epoch": 0.42, "grad_norm": 1.7653764845204885, "learning_rate": 6.569316018429304e-06, "loss": 0.6859, "step": 6506 }, { "epoch": 0.42, "grad_norm": 1.6078074688579598, "learning_rate": 6.568331808137619e-06, "loss": 0.6791, "step": 6507 }, { "epoch": 0.42, "grad_norm": 1.4895998324234734, "learning_rate": 6.567347530442328e-06, "loss": 0.7872, "step": 6508 }, { "epoch": 0.42, "grad_norm": 1.7104457217351694, "learning_rate": 6.5663631853857355e-06, "loss": 0.6181, "step": 6509 }, { "epoch": 0.42, "grad_norm": 1.7495638582409916, "learning_rate": 6.565378773010144e-06, "loss": 0.7986, "step": 6510 }, { "epoch": 0.42, "grad_norm": 1.8596805675311694, "learning_rate": 6.564394293357861e-06, "loss": 0.7272, "step": 6511 }, { "epoch": 0.42, "grad_norm": 1.001406916058811, "learning_rate": 6.5634097464711995e-06, "loss": 0.5986, "step": 6512 }, { "epoch": 0.42, "grad_norm": 1.5217926881290482, "learning_rate": 6.562425132392473e-06, "loss": 0.6428, "step": 6513 }, { "epoch": 0.42, "grad_norm": 1.50919533313603, "learning_rate": 6.5614404511639964e-06, "loss": 0.6243, "step": 6514 }, { "epoch": 0.42, "grad_norm": 1.4483921254772145, "learning_rate": 6.560455702828089e-06, "loss": 0.6652, "step": 6515 }, { "epoch": 0.42, "grad_norm": 2.825545091413147, "learning_rate": 6.559470887427076e-06, "loss": 0.7682, "step": 6516 }, { "epoch": 0.42, "grad_norm": 1.446060963447501, "learning_rate": 6.558486005003281e-06, "loss": 0.6829, "step": 6517 }, { "epoch": 0.42, "grad_norm": 1.3192183701504543, "learning_rate": 6.557501055599032e-06, "loss": 0.759, "step": 6518 }, { "epoch": 0.42, "grad_norm": 1.543367150209722, "learning_rate": 6.556516039256662e-06, "loss": 0.6375, "step": 6519 }, { "epoch": 0.42, "grad_norm": 1.1358092086576723, "learning_rate": 6.555530956018502e-06, "loss": 0.6711, "step": 6520 }, { "epoch": 0.42, "grad_norm": 1.4391890236227638, "learning_rate": 6.554545805926891e-06, "loss": 0.7025, "step": 6521 }, { "epoch": 0.42, "grad_norm": 1.5531040617482614, "learning_rate": 6.553560589024166e-06, "loss": 0.6301, "step": 6522 }, { "epoch": 0.42, "grad_norm": 1.0295826604161775, "learning_rate": 6.552575305352672e-06, "loss": 0.6798, "step": 6523 }, { "epoch": 0.42, "grad_norm": 1.43157142556052, "learning_rate": 6.551589954954754e-06, "loss": 0.6959, "step": 6524 }, { "epoch": 0.42, "grad_norm": 2.164767136508896, "learning_rate": 6.55060453787276e-06, "loss": 0.6784, "step": 6525 }, { "epoch": 0.42, "grad_norm": 1.7918451373519473, "learning_rate": 6.549619054149041e-06, "loss": 0.6659, "step": 6526 }, { "epoch": 0.42, "grad_norm": 1.452176173271425, "learning_rate": 6.548633503825953e-06, "loss": 0.6852, "step": 6527 }, { "epoch": 0.42, "grad_norm": 1.6655465593245553, "learning_rate": 6.5476478869458496e-06, "loss": 0.792, "step": 6528 }, { "epoch": 0.42, "grad_norm": 1.197147035964685, "learning_rate": 6.546662203551092e-06, "loss": 0.596, "step": 6529 }, { "epoch": 0.42, "grad_norm": 1.2679025256459466, "learning_rate": 6.545676453684043e-06, "loss": 0.7688, "step": 6530 }, { "epoch": 0.42, "grad_norm": 1.5936619325165133, "learning_rate": 6.544690637387068e-06, "loss": 0.7251, "step": 6531 }, { "epoch": 0.42, "grad_norm": 1.6267670224934279, "learning_rate": 6.543704754702536e-06, "loss": 0.6457, "step": 6532 }, { "epoch": 0.42, "grad_norm": 1.4336184980186846, "learning_rate": 6.5427188056728165e-06, "loss": 0.7069, "step": 6533 }, { "epoch": 0.42, "grad_norm": 1.6164630220328455, "learning_rate": 6.541732790340285e-06, "loss": 0.6921, "step": 6534 }, { "epoch": 0.42, "grad_norm": 1.9158489071674598, "learning_rate": 6.540746708747317e-06, "loss": 0.645, "step": 6535 }, { "epoch": 0.42, "grad_norm": 1.4475446806770549, "learning_rate": 6.539760560936292e-06, "loss": 0.7206, "step": 6536 }, { "epoch": 0.42, "grad_norm": 1.4065682311298797, "learning_rate": 6.538774346949597e-06, "loss": 0.7386, "step": 6537 }, { "epoch": 0.42, "grad_norm": 1.3322115261263203, "learning_rate": 6.537788066829611e-06, "loss": 0.7071, "step": 6538 }, { "epoch": 0.42, "grad_norm": 0.9218457438483179, "learning_rate": 6.536801720618727e-06, "loss": 0.6654, "step": 6539 }, { "epoch": 0.42, "grad_norm": 1.5740873490660805, "learning_rate": 6.5358153083593345e-06, "loss": 0.7565, "step": 6540 }, { "epoch": 0.42, "grad_norm": 1.5421361976981904, "learning_rate": 6.534828830093827e-06, "loss": 0.7397, "step": 6541 }, { "epoch": 0.42, "grad_norm": 1.253666633751493, "learning_rate": 6.5338422858646e-06, "loss": 0.6174, "step": 6542 }, { "epoch": 0.42, "grad_norm": 1.4612833481446805, "learning_rate": 6.532855675714055e-06, "loss": 0.7653, "step": 6543 }, { "epoch": 0.42, "grad_norm": 1.2395432334295446, "learning_rate": 6.531868999684594e-06, "loss": 0.7095, "step": 6544 }, { "epoch": 0.42, "grad_norm": 2.253234171053288, "learning_rate": 6.5308822578186225e-06, "loss": 0.7316, "step": 6545 }, { "epoch": 0.42, "grad_norm": 1.6117173247734913, "learning_rate": 6.529895450158549e-06, "loss": 0.6603, "step": 6546 }, { "epoch": 0.42, "grad_norm": 1.5962792408709998, "learning_rate": 6.528908576746784e-06, "loss": 0.7782, "step": 6547 }, { "epoch": 0.42, "grad_norm": 1.43564898428641, "learning_rate": 6.527921637625741e-06, "loss": 0.6406, "step": 6548 }, { "epoch": 0.42, "grad_norm": 1.525870076622832, "learning_rate": 6.526934632837835e-06, "loss": 0.7221, "step": 6549 }, { "epoch": 0.42, "grad_norm": 2.502735401121555, "learning_rate": 6.5259475624254875e-06, "loss": 0.6562, "step": 6550 }, { "epoch": 0.42, "grad_norm": 1.6390893066220642, "learning_rate": 6.5249604264311216e-06, "loss": 0.7479, "step": 6551 }, { "epoch": 0.42, "grad_norm": 1.4882559142570668, "learning_rate": 6.5239732248971595e-06, "loss": 0.7005, "step": 6552 }, { "epoch": 0.42, "grad_norm": 1.4996240843703332, "learning_rate": 6.522985957866032e-06, "loss": 0.7034, "step": 6553 }, { "epoch": 0.42, "grad_norm": 1.4966119755243281, "learning_rate": 6.521998625380167e-06, "loss": 0.6755, "step": 6554 }, { "epoch": 0.42, "grad_norm": 1.3833556983908668, "learning_rate": 6.521011227482e-06, "loss": 0.6999, "step": 6555 }, { "epoch": 0.42, "grad_norm": 1.4150684671076568, "learning_rate": 6.520023764213968e-06, "loss": 0.7419, "step": 6556 }, { "epoch": 0.42, "grad_norm": 1.4808088700881228, "learning_rate": 6.519036235618505e-06, "loss": 0.745, "step": 6557 }, { "epoch": 0.42, "grad_norm": 1.5692792176433965, "learning_rate": 6.518048641738059e-06, "loss": 0.7113, "step": 6558 }, { "epoch": 0.42, "grad_norm": 1.4470295822605272, "learning_rate": 6.517060982615071e-06, "loss": 0.6529, "step": 6559 }, { "epoch": 0.42, "grad_norm": 1.515909308897942, "learning_rate": 6.51607325829199e-06, "loss": 0.7634, "step": 6560 }, { "epoch": 0.42, "grad_norm": 1.6751821924778625, "learning_rate": 6.5150854688112686e-06, "loss": 0.6989, "step": 6561 }, { "epoch": 0.42, "grad_norm": 1.6504362368448278, "learning_rate": 6.514097614215353e-06, "loss": 0.7154, "step": 6562 }, { "epoch": 0.42, "grad_norm": 1.4934217391622435, "learning_rate": 6.513109694546707e-06, "loss": 0.7006, "step": 6563 }, { "epoch": 0.42, "grad_norm": 1.4248567782457782, "learning_rate": 6.512121709847785e-06, "loss": 0.6523, "step": 6564 }, { "epoch": 0.42, "grad_norm": 1.268298953284019, "learning_rate": 6.511133660161047e-06, "loss": 0.6955, "step": 6565 }, { "epoch": 0.42, "grad_norm": 2.0107763899754256, "learning_rate": 6.510145545528963e-06, "loss": 0.7503, "step": 6566 }, { "epoch": 0.42, "grad_norm": 2.055884899524297, "learning_rate": 6.5091573659939945e-06, "loss": 0.7411, "step": 6567 }, { "epoch": 0.42, "grad_norm": 1.612348514302224, "learning_rate": 6.508169121598615e-06, "loss": 0.6486, "step": 6568 }, { "epoch": 0.42, "grad_norm": 1.9248764158365097, "learning_rate": 6.507180812385295e-06, "loss": 0.6533, "step": 6569 }, { "epoch": 0.42, "grad_norm": 1.3494706558994765, "learning_rate": 6.506192438396512e-06, "loss": 0.7176, "step": 6570 }, { "epoch": 0.42, "grad_norm": 1.740571104148459, "learning_rate": 6.50520399967474e-06, "loss": 0.5027, "step": 6571 }, { "epoch": 0.42, "grad_norm": 1.4661801853439, "learning_rate": 6.504215496262464e-06, "loss": 0.6919, "step": 6572 }, { "epoch": 0.42, "grad_norm": 2.0207730142280393, "learning_rate": 6.503226928202167e-06, "loss": 0.6605, "step": 6573 }, { "epoch": 0.42, "grad_norm": 1.576153969485024, "learning_rate": 6.502238295536336e-06, "loss": 0.6618, "step": 6574 }, { "epoch": 0.42, "grad_norm": 1.7663410187201074, "learning_rate": 6.5012495983074605e-06, "loss": 0.6294, "step": 6575 }, { "epoch": 0.42, "grad_norm": 1.6630295489686076, "learning_rate": 6.500260836558031e-06, "loss": 0.7212, "step": 6576 }, { "epoch": 0.42, "grad_norm": 1.4830564433848685, "learning_rate": 6.499272010330543e-06, "loss": 0.6873, "step": 6577 }, { "epoch": 0.42, "grad_norm": 1.7321082983146314, "learning_rate": 6.4982831196674945e-06, "loss": 0.6565, "step": 6578 }, { "epoch": 0.42, "grad_norm": 1.7659629623616246, "learning_rate": 6.497294164611385e-06, "loss": 0.7438, "step": 6579 }, { "epoch": 0.42, "grad_norm": 1.7622630847226695, "learning_rate": 6.496305145204719e-06, "loss": 0.7846, "step": 6580 }, { "epoch": 0.42, "grad_norm": 1.5696118278169393, "learning_rate": 6.495316061490003e-06, "loss": 0.6716, "step": 6581 }, { "epoch": 0.42, "grad_norm": 2.0669121875094087, "learning_rate": 6.494326913509744e-06, "loss": 0.8725, "step": 6582 }, { "epoch": 0.42, "grad_norm": 1.4864431480327271, "learning_rate": 6.493337701306454e-06, "loss": 0.5812, "step": 6583 }, { "epoch": 0.42, "grad_norm": 1.7313697305923041, "learning_rate": 6.492348424922648e-06, "loss": 0.7298, "step": 6584 }, { "epoch": 0.42, "grad_norm": 1.6281859590272934, "learning_rate": 6.4913590844008436e-06, "loss": 0.7104, "step": 6585 }, { "epoch": 0.42, "grad_norm": 1.3858515922359307, "learning_rate": 6.490369679783557e-06, "loss": 0.6745, "step": 6586 }, { "epoch": 0.42, "grad_norm": 1.4781274429152214, "learning_rate": 6.489380211113316e-06, "loss": 0.7186, "step": 6587 }, { "epoch": 0.42, "grad_norm": 1.4330004194146486, "learning_rate": 6.488390678432641e-06, "loss": 0.6583, "step": 6588 }, { "epoch": 0.42, "grad_norm": 1.4295826609882176, "learning_rate": 6.487401081784063e-06, "loss": 0.6273, "step": 6589 }, { "epoch": 0.42, "grad_norm": 1.7408606676015261, "learning_rate": 6.486411421210112e-06, "loss": 0.7383, "step": 6590 }, { "epoch": 0.42, "grad_norm": 1.4585716655420988, "learning_rate": 6.485421696753321e-06, "loss": 0.6689, "step": 6591 }, { "epoch": 0.42, "grad_norm": 1.2223486924342697, "learning_rate": 6.484431908456228e-06, "loss": 0.6393, "step": 6592 }, { "epoch": 0.42, "grad_norm": 1.3940888755231944, "learning_rate": 6.4834420563613685e-06, "loss": 0.5636, "step": 6593 }, { "epoch": 0.42, "grad_norm": 1.445547934472794, "learning_rate": 6.482452140511288e-06, "loss": 0.655, "step": 6594 }, { "epoch": 0.42, "grad_norm": 1.1349659322401455, "learning_rate": 6.481462160948531e-06, "loss": 0.5898, "step": 6595 }, { "epoch": 0.42, "grad_norm": 1.497822015417031, "learning_rate": 6.480472117715642e-06, "loss": 0.7054, "step": 6596 }, { "epoch": 0.42, "grad_norm": 1.5933313349766125, "learning_rate": 6.479482010855172e-06, "loss": 0.7892, "step": 6597 }, { "epoch": 0.42, "grad_norm": 2.3096885457959413, "learning_rate": 6.478491840409675e-06, "loss": 0.6907, "step": 6598 }, { "epoch": 0.42, "grad_norm": 1.5941334234489397, "learning_rate": 6.477501606421703e-06, "loss": 0.7261, "step": 6599 }, { "epoch": 0.42, "grad_norm": 1.430598200057668, "learning_rate": 6.47651130893382e-06, "loss": 0.6085, "step": 6600 }, { "epoch": 0.42, "grad_norm": 1.589866470049346, "learning_rate": 6.4755209479885806e-06, "loss": 0.6555, "step": 6601 }, { "epoch": 0.42, "grad_norm": 1.634499823341136, "learning_rate": 6.474530523628553e-06, "loss": 0.701, "step": 6602 }, { "epoch": 0.42, "grad_norm": 1.661473857226236, "learning_rate": 6.473540035896301e-06, "loss": 0.7655, "step": 6603 }, { "epoch": 0.42, "grad_norm": 1.6894117201034682, "learning_rate": 6.472549484834395e-06, "loss": 0.7008, "step": 6604 }, { "epoch": 0.42, "grad_norm": 1.4973147158938036, "learning_rate": 6.471558870485407e-06, "loss": 0.6865, "step": 6605 }, { "epoch": 0.42, "grad_norm": 1.4039656751364853, "learning_rate": 6.470568192891911e-06, "loss": 0.6788, "step": 6606 }, { "epoch": 0.42, "grad_norm": 1.7491481270938514, "learning_rate": 6.469577452096483e-06, "loss": 0.6505, "step": 6607 }, { "epoch": 0.42, "grad_norm": 4.527531906532081, "learning_rate": 6.468586648141704e-06, "loss": 0.714, "step": 6608 }, { "epoch": 0.42, "grad_norm": 1.1482557549178414, "learning_rate": 6.467595781070158e-06, "loss": 0.7237, "step": 6609 }, { "epoch": 0.42, "grad_norm": 1.5892277939329686, "learning_rate": 6.466604850924427e-06, "loss": 0.6868, "step": 6610 }, { "epoch": 0.42, "grad_norm": 1.4936388996500094, "learning_rate": 6.4656138577471036e-06, "loss": 0.728, "step": 6611 }, { "epoch": 0.42, "grad_norm": 1.6880199490199679, "learning_rate": 6.464622801580776e-06, "loss": 0.6957, "step": 6612 }, { "epoch": 0.42, "grad_norm": 3.085458011379202, "learning_rate": 6.463631682468038e-06, "loss": 0.8653, "step": 6613 }, { "epoch": 0.42, "grad_norm": 1.569668755558597, "learning_rate": 6.462640500451484e-06, "loss": 0.7275, "step": 6614 }, { "epoch": 0.42, "grad_norm": 2.5536318114338226, "learning_rate": 6.461649255573716e-06, "loss": 0.7578, "step": 6615 }, { "epoch": 0.42, "grad_norm": 1.4686300607945553, "learning_rate": 6.460657947877335e-06, "loss": 0.6088, "step": 6616 }, { "epoch": 0.42, "grad_norm": 0.9474289577714805, "learning_rate": 6.459666577404944e-06, "loss": 0.5979, "step": 6617 }, { "epoch": 0.42, "grad_norm": 1.5243387799237458, "learning_rate": 6.45867514419915e-06, "loss": 0.8227, "step": 6618 }, { "epoch": 0.42, "grad_norm": 1.6055070920948051, "learning_rate": 6.457683648302565e-06, "loss": 0.7127, "step": 6619 }, { "epoch": 0.42, "grad_norm": 1.284251840350561, "learning_rate": 6.456692089757799e-06, "loss": 0.6019, "step": 6620 }, { "epoch": 0.42, "grad_norm": 1.5591395942583646, "learning_rate": 6.455700468607469e-06, "loss": 0.6702, "step": 6621 }, { "epoch": 0.42, "grad_norm": 1.633585560921289, "learning_rate": 6.454708784894189e-06, "loss": 0.7019, "step": 6622 }, { "epoch": 0.42, "grad_norm": 2.0059277329702314, "learning_rate": 6.453717038660584e-06, "loss": 0.7662, "step": 6623 }, { "epoch": 0.42, "grad_norm": 1.4087184018269852, "learning_rate": 6.452725229949275e-06, "loss": 0.6707, "step": 6624 }, { "epoch": 0.42, "grad_norm": 1.7362677079405386, "learning_rate": 6.451733358802889e-06, "loss": 0.7268, "step": 6625 }, { "epoch": 0.42, "grad_norm": 1.480798814429332, "learning_rate": 6.450741425264052e-06, "loss": 0.6126, "step": 6626 }, { "epoch": 0.42, "grad_norm": 1.6038333835054117, "learning_rate": 6.449749429375398e-06, "loss": 0.6959, "step": 6627 }, { "epoch": 0.42, "grad_norm": 1.7206420952222012, "learning_rate": 6.4487573711795604e-06, "loss": 0.6911, "step": 6628 }, { "epoch": 0.42, "grad_norm": 1.6350945431568475, "learning_rate": 6.4477652507191744e-06, "loss": 0.6155, "step": 6629 }, { "epoch": 0.42, "grad_norm": 1.3941260734898357, "learning_rate": 6.44677306803688e-06, "loss": 0.6872, "step": 6630 }, { "epoch": 0.42, "grad_norm": 1.666899348747208, "learning_rate": 6.44578082317532e-06, "loss": 0.6828, "step": 6631 }, { "epoch": 0.42, "grad_norm": 1.627061912475034, "learning_rate": 6.444788516177138e-06, "loss": 0.6924, "step": 6632 }, { "epoch": 0.42, "grad_norm": 1.3756633128060922, "learning_rate": 6.443796147084982e-06, "loss": 0.7671, "step": 6633 }, { "epoch": 0.42, "grad_norm": 1.4820217563232532, "learning_rate": 6.442803715941501e-06, "loss": 0.6975, "step": 6634 }, { "epoch": 0.42, "grad_norm": 1.5841556985360725, "learning_rate": 6.441811222789347e-06, "loss": 0.6763, "step": 6635 }, { "epoch": 0.42, "grad_norm": 1.5596020880350328, "learning_rate": 6.440818667671178e-06, "loss": 0.6593, "step": 6636 }, { "epoch": 0.42, "grad_norm": 1.5856385713444643, "learning_rate": 6.439826050629649e-06, "loss": 0.6725, "step": 6637 }, { "epoch": 0.42, "grad_norm": 1.6273009072271318, "learning_rate": 6.4388333717074226e-06, "loss": 0.6451, "step": 6638 }, { "epoch": 0.42, "grad_norm": 0.9999770143464197, "learning_rate": 6.4378406309471605e-06, "loss": 0.5808, "step": 6639 }, { "epoch": 0.43, "grad_norm": 1.9964625803748877, "learning_rate": 6.43684782839153e-06, "loss": 0.7149, "step": 6640 }, { "epoch": 0.43, "grad_norm": 1.556259616205762, "learning_rate": 6.435854964083199e-06, "loss": 0.6769, "step": 6641 }, { "epoch": 0.43, "grad_norm": 1.5923020465959583, "learning_rate": 6.434862038064839e-06, "loss": 0.7267, "step": 6642 }, { "epoch": 0.43, "grad_norm": 1.6216228366292937, "learning_rate": 6.433869050379122e-06, "loss": 0.6705, "step": 6643 }, { "epoch": 0.43, "grad_norm": 1.5552707380219981, "learning_rate": 6.432876001068729e-06, "loss": 0.6916, "step": 6644 }, { "epoch": 0.43, "grad_norm": 1.6641469189346891, "learning_rate": 6.431882890176334e-06, "loss": 0.7801, "step": 6645 }, { "epoch": 0.43, "grad_norm": 1.4024646630703368, "learning_rate": 6.430889717744622e-06, "loss": 0.6896, "step": 6646 }, { "epoch": 0.43, "grad_norm": 1.19270614849827, "learning_rate": 6.429896483816277e-06, "loss": 0.6443, "step": 6647 }, { "epoch": 0.43, "grad_norm": 1.647183914436174, "learning_rate": 6.4289031884339855e-06, "loss": 0.7832, "step": 6648 }, { "epoch": 0.43, "grad_norm": 1.8232273723968075, "learning_rate": 6.427909831640438e-06, "loss": 0.7312, "step": 6649 }, { "epoch": 0.43, "grad_norm": 1.1009589931363433, "learning_rate": 6.426916413478326e-06, "loss": 0.6228, "step": 6650 }, { "epoch": 0.43, "grad_norm": 1.6347355927940161, "learning_rate": 6.425922933990344e-06, "loss": 0.7384, "step": 6651 }, { "epoch": 0.43, "grad_norm": 1.5402686316911045, "learning_rate": 6.424929393219192e-06, "loss": 0.7113, "step": 6652 }, { "epoch": 0.43, "grad_norm": 1.4705040952868118, "learning_rate": 6.423935791207568e-06, "loss": 0.7768, "step": 6653 }, { "epoch": 0.43, "grad_norm": 1.6530802045016222, "learning_rate": 6.422942127998175e-06, "loss": 0.6253, "step": 6654 }, { "epoch": 0.43, "grad_norm": 1.9830056512261585, "learning_rate": 6.421948403633721e-06, "loss": 0.7025, "step": 6655 }, { "epoch": 0.43, "grad_norm": 1.4824405747870717, "learning_rate": 6.420954618156912e-06, "loss": 0.7156, "step": 6656 }, { "epoch": 0.43, "grad_norm": 1.4583715522906568, "learning_rate": 6.4199607716104605e-06, "loss": 0.6809, "step": 6657 }, { "epoch": 0.43, "grad_norm": 1.5717216161371708, "learning_rate": 6.418966864037076e-06, "loss": 0.7479, "step": 6658 }, { "epoch": 0.43, "grad_norm": 1.061299531583643, "learning_rate": 6.41797289547948e-06, "loss": 0.7434, "step": 6659 }, { "epoch": 0.43, "grad_norm": 1.521713679472904, "learning_rate": 6.416978865980388e-06, "loss": 0.6589, "step": 6660 }, { "epoch": 0.43, "grad_norm": 1.8611373084491405, "learning_rate": 6.415984775582521e-06, "loss": 0.6157, "step": 6661 }, { "epoch": 0.43, "grad_norm": 1.5041280109829624, "learning_rate": 6.414990624328604e-06, "loss": 0.6888, "step": 6662 }, { "epoch": 0.43, "grad_norm": 1.583077643272022, "learning_rate": 6.413996412261363e-06, "loss": 0.7564, "step": 6663 }, { "epoch": 0.43, "grad_norm": 1.7247322894231893, "learning_rate": 6.413002139423527e-06, "loss": 0.6897, "step": 6664 }, { "epoch": 0.43, "grad_norm": 1.7897112088865992, "learning_rate": 6.412007805857828e-06, "loss": 0.637, "step": 6665 }, { "epoch": 0.43, "grad_norm": 1.4414596897364014, "learning_rate": 6.411013411607002e-06, "loss": 0.6702, "step": 6666 }, { "epoch": 0.43, "grad_norm": 1.1745225778785053, "learning_rate": 6.410018956713784e-06, "loss": 0.6965, "step": 6667 }, { "epoch": 0.43, "grad_norm": 1.1434641344570962, "learning_rate": 6.409024441220915e-06, "loss": 0.6913, "step": 6668 }, { "epoch": 0.43, "grad_norm": 1.2722558605134604, "learning_rate": 6.408029865171135e-06, "loss": 0.6412, "step": 6669 }, { "epoch": 0.43, "grad_norm": 1.3863651962822248, "learning_rate": 6.40703522860719e-06, "loss": 0.6464, "step": 6670 }, { "epoch": 0.43, "grad_norm": 1.604233243043214, "learning_rate": 6.406040531571828e-06, "loss": 0.7692, "step": 6671 }, { "epoch": 0.43, "grad_norm": 1.0510288426111, "learning_rate": 6.405045774107798e-06, "loss": 0.6155, "step": 6672 }, { "epoch": 0.43, "grad_norm": 1.3944821551008533, "learning_rate": 6.404050956257853e-06, "loss": 0.6277, "step": 6673 }, { "epoch": 0.43, "grad_norm": 1.554858654503512, "learning_rate": 6.403056078064749e-06, "loss": 0.7495, "step": 6674 }, { "epoch": 0.43, "grad_norm": 1.5533788059753477, "learning_rate": 6.402061139571243e-06, "loss": 0.7407, "step": 6675 }, { "epoch": 0.43, "grad_norm": 1.3760088605864793, "learning_rate": 6.401066140820095e-06, "loss": 0.6014, "step": 6676 }, { "epoch": 0.43, "grad_norm": 1.5019996114110525, "learning_rate": 6.400071081854068e-06, "loss": 0.7212, "step": 6677 }, { "epoch": 0.43, "grad_norm": 1.0760802384436152, "learning_rate": 6.3990759627159285e-06, "loss": 0.6449, "step": 6678 }, { "epoch": 0.43, "grad_norm": 1.4919294356289188, "learning_rate": 6.398080783448443e-06, "loss": 0.6598, "step": 6679 }, { "epoch": 0.43, "grad_norm": 1.820801119300255, "learning_rate": 6.397085544094383e-06, "loss": 0.6496, "step": 6680 }, { "epoch": 0.43, "grad_norm": 1.4339798710885945, "learning_rate": 6.396090244696523e-06, "loss": 0.6748, "step": 6681 }, { "epoch": 0.43, "grad_norm": 2.1527632468196094, "learning_rate": 6.395094885297637e-06, "loss": 0.606, "step": 6682 }, { "epoch": 0.43, "grad_norm": 1.6650873291742927, "learning_rate": 6.394099465940505e-06, "loss": 0.748, "step": 6683 }, { "epoch": 0.43, "grad_norm": 1.6371247007158296, "learning_rate": 6.393103986667908e-06, "loss": 0.7275, "step": 6684 }, { "epoch": 0.43, "grad_norm": 1.493127919856055, "learning_rate": 6.3921084475226295e-06, "loss": 0.6551, "step": 6685 }, { "epoch": 0.43, "grad_norm": 1.6093278444356502, "learning_rate": 6.3911128485474515e-06, "loss": 0.6972, "step": 6686 }, { "epoch": 0.43, "grad_norm": 1.640906704219133, "learning_rate": 6.39011718978517e-06, "loss": 0.6589, "step": 6687 }, { "epoch": 0.43, "grad_norm": 1.67006390801591, "learning_rate": 6.389121471278572e-06, "loss": 0.6593, "step": 6688 }, { "epoch": 0.43, "grad_norm": 1.8033583095948458, "learning_rate": 6.388125693070452e-06, "loss": 0.707, "step": 6689 }, { "epoch": 0.43, "grad_norm": 1.4332914775882157, "learning_rate": 6.387129855203606e-06, "loss": 0.7163, "step": 6690 }, { "epoch": 0.43, "grad_norm": 1.6474473234456513, "learning_rate": 6.386133957720833e-06, "loss": 0.7251, "step": 6691 }, { "epoch": 0.43, "grad_norm": 1.8676648318216376, "learning_rate": 6.385138000664937e-06, "loss": 0.8375, "step": 6692 }, { "epoch": 0.43, "grad_norm": 1.485656668810982, "learning_rate": 6.384141984078719e-06, "loss": 0.7004, "step": 6693 }, { "epoch": 0.43, "grad_norm": 1.6266694283415182, "learning_rate": 6.3831459080049865e-06, "loss": 0.6714, "step": 6694 }, { "epoch": 0.43, "grad_norm": 1.6612967885643055, "learning_rate": 6.38214977248655e-06, "loss": 0.6925, "step": 6695 }, { "epoch": 0.43, "grad_norm": 1.5647356514112767, "learning_rate": 6.381153577566222e-06, "loss": 0.6925, "step": 6696 }, { "epoch": 0.43, "grad_norm": 1.4655469616318497, "learning_rate": 6.380157323286813e-06, "loss": 0.7322, "step": 6697 }, { "epoch": 0.43, "grad_norm": 1.1162189375011118, "learning_rate": 6.3791610096911435e-06, "loss": 0.7707, "step": 6698 }, { "epoch": 0.43, "grad_norm": 1.3839115276506768, "learning_rate": 6.378164636822033e-06, "loss": 0.6072, "step": 6699 }, { "epoch": 0.43, "grad_norm": 1.5702378411529145, "learning_rate": 6.3771682047223e-06, "loss": 0.6532, "step": 6700 }, { "epoch": 0.43, "grad_norm": 1.1887539485256677, "learning_rate": 6.376171713434771e-06, "loss": 0.6495, "step": 6701 }, { "epoch": 0.43, "grad_norm": 2.208362935049586, "learning_rate": 6.375175163002275e-06, "loss": 0.703, "step": 6702 }, { "epoch": 0.43, "grad_norm": 1.5250778319489005, "learning_rate": 6.3741785534676404e-06, "loss": 0.6611, "step": 6703 }, { "epoch": 0.43, "grad_norm": 1.521354984723688, "learning_rate": 6.373181884873699e-06, "loss": 0.6053, "step": 6704 }, { "epoch": 0.43, "grad_norm": 1.7213432049708632, "learning_rate": 6.372185157263287e-06, "loss": 0.6592, "step": 6705 }, { "epoch": 0.43, "grad_norm": 1.2442872249770107, "learning_rate": 6.3711883706792375e-06, "loss": 0.6148, "step": 6706 }, { "epoch": 0.43, "grad_norm": 1.437430554206742, "learning_rate": 6.370191525164394e-06, "loss": 0.6871, "step": 6707 }, { "epoch": 0.43, "grad_norm": 1.4693715480074594, "learning_rate": 6.369194620761598e-06, "loss": 0.6746, "step": 6708 }, { "epoch": 0.43, "grad_norm": 1.6116365308353873, "learning_rate": 6.368197657513695e-06, "loss": 0.7102, "step": 6709 }, { "epoch": 0.43, "grad_norm": 1.6772811753413326, "learning_rate": 6.367200635463531e-06, "loss": 0.6909, "step": 6710 }, { "epoch": 0.43, "grad_norm": 1.3732433619610442, "learning_rate": 6.366203554653957e-06, "loss": 0.6236, "step": 6711 }, { "epoch": 0.43, "grad_norm": 1.591827720641944, "learning_rate": 6.365206415127825e-06, "loss": 0.6238, "step": 6712 }, { "epoch": 0.43, "grad_norm": 1.0827326029288482, "learning_rate": 6.36420921692799e-06, "loss": 0.7246, "step": 6713 }, { "epoch": 0.43, "grad_norm": 1.664022893165805, "learning_rate": 6.363211960097309e-06, "loss": 0.7041, "step": 6714 }, { "epoch": 0.43, "grad_norm": 1.5658266733819202, "learning_rate": 6.362214644678641e-06, "loss": 0.7245, "step": 6715 }, { "epoch": 0.43, "grad_norm": 1.4398603166467048, "learning_rate": 6.361217270714854e-06, "loss": 0.7364, "step": 6716 }, { "epoch": 0.43, "grad_norm": 1.5182565379704471, "learning_rate": 6.360219838248806e-06, "loss": 0.6568, "step": 6717 }, { "epoch": 0.43, "grad_norm": 1.533468021163663, "learning_rate": 6.359222347323368e-06, "loss": 0.6427, "step": 6718 }, { "epoch": 0.43, "grad_norm": 1.7682252396969926, "learning_rate": 6.358224797981409e-06, "loss": 0.7078, "step": 6719 }, { "epoch": 0.43, "grad_norm": 1.4231986319678016, "learning_rate": 6.357227190265804e-06, "loss": 0.6364, "step": 6720 }, { "epoch": 0.43, "grad_norm": 1.6504663817991454, "learning_rate": 6.356229524219425e-06, "loss": 0.8157, "step": 6721 }, { "epoch": 0.43, "grad_norm": 2.042221405845588, "learning_rate": 6.355231799885151e-06, "loss": 0.6833, "step": 6722 }, { "epoch": 0.43, "grad_norm": 1.61964590246847, "learning_rate": 6.354234017305863e-06, "loss": 0.6465, "step": 6723 }, { "epoch": 0.43, "grad_norm": 1.7489106180311258, "learning_rate": 6.353236176524441e-06, "loss": 0.594, "step": 6724 }, { "epoch": 0.43, "grad_norm": 1.8349905238278774, "learning_rate": 6.352238277583773e-06, "loss": 0.7427, "step": 6725 }, { "epoch": 0.43, "grad_norm": 1.7723555649333236, "learning_rate": 6.351240320526744e-06, "loss": 0.7454, "step": 6726 }, { "epoch": 0.43, "grad_norm": 1.2485177984103382, "learning_rate": 6.350242305396247e-06, "loss": 0.7297, "step": 6727 }, { "epoch": 0.43, "grad_norm": 1.556244392036679, "learning_rate": 6.349244232235172e-06, "loss": 0.6631, "step": 6728 }, { "epoch": 0.43, "grad_norm": 1.4175903556752794, "learning_rate": 6.348246101086414e-06, "loss": 0.5956, "step": 6729 }, { "epoch": 0.43, "grad_norm": 1.5377578291934673, "learning_rate": 6.347247911992873e-06, "loss": 0.7559, "step": 6730 }, { "epoch": 0.43, "grad_norm": 1.152370130659103, "learning_rate": 6.346249664997448e-06, "loss": 0.707, "step": 6731 }, { "epoch": 0.43, "grad_norm": 1.5902658509810468, "learning_rate": 6.345251360143041e-06, "loss": 0.5855, "step": 6732 }, { "epoch": 0.43, "grad_norm": 1.7783679633692544, "learning_rate": 6.344252997472556e-06, "loss": 0.796, "step": 6733 }, { "epoch": 0.43, "grad_norm": 1.5066034787354672, "learning_rate": 6.343254577028903e-06, "loss": 0.6569, "step": 6734 }, { "epoch": 0.43, "grad_norm": 1.6604030913466554, "learning_rate": 6.342256098854992e-06, "loss": 0.7362, "step": 6735 }, { "epoch": 0.43, "grad_norm": 1.5224141684026304, "learning_rate": 6.341257562993732e-06, "loss": 0.6367, "step": 6736 }, { "epoch": 0.43, "grad_norm": 1.6098365146343603, "learning_rate": 6.34025896948804e-06, "loss": 0.7403, "step": 6737 }, { "epoch": 0.43, "grad_norm": 1.6489439120975191, "learning_rate": 6.339260318380835e-06, "loss": 0.7193, "step": 6738 }, { "epoch": 0.43, "grad_norm": 2.0959380987153073, "learning_rate": 6.338261609715037e-06, "loss": 0.6544, "step": 6739 }, { "epoch": 0.43, "grad_norm": 1.7595681228738558, "learning_rate": 6.337262843533566e-06, "loss": 0.7124, "step": 6740 }, { "epoch": 0.43, "grad_norm": 6.539660660749554, "learning_rate": 6.336264019879348e-06, "loss": 0.6272, "step": 6741 }, { "epoch": 0.43, "grad_norm": 1.5835926412638919, "learning_rate": 6.33526513879531e-06, "loss": 0.6281, "step": 6742 }, { "epoch": 0.43, "grad_norm": 1.5576710028426901, "learning_rate": 6.334266200324381e-06, "loss": 0.6963, "step": 6743 }, { "epoch": 0.43, "grad_norm": 1.5237647906450125, "learning_rate": 6.333267204509497e-06, "loss": 0.7037, "step": 6744 }, { "epoch": 0.43, "grad_norm": 1.5695756203733593, "learning_rate": 6.332268151393589e-06, "loss": 0.766, "step": 6745 }, { "epoch": 0.43, "grad_norm": 1.429766839516369, "learning_rate": 6.331269041019596e-06, "loss": 0.6568, "step": 6746 }, { "epoch": 0.43, "grad_norm": 1.7500008446951179, "learning_rate": 6.330269873430455e-06, "loss": 0.749, "step": 6747 }, { "epoch": 0.43, "grad_norm": 1.466207377851263, "learning_rate": 6.329270648669111e-06, "loss": 0.6556, "step": 6748 }, { "epoch": 0.43, "grad_norm": 1.2231127380835611, "learning_rate": 6.3282713667785086e-06, "loss": 0.6507, "step": 6749 }, { "epoch": 0.43, "grad_norm": 1.248071189021218, "learning_rate": 6.327272027801592e-06, "loss": 0.6889, "step": 6750 }, { "epoch": 0.43, "grad_norm": 1.5591046025192319, "learning_rate": 6.326272631781314e-06, "loss": 0.6366, "step": 6751 }, { "epoch": 0.43, "grad_norm": 1.348429240281959, "learning_rate": 6.3252731787606256e-06, "loss": 0.6343, "step": 6752 }, { "epoch": 0.43, "grad_norm": 1.5776323744732135, "learning_rate": 6.32427366878248e-06, "loss": 0.776, "step": 6753 }, { "epoch": 0.43, "grad_norm": 1.5813918734984802, "learning_rate": 6.323274101889836e-06, "loss": 0.6586, "step": 6754 }, { "epoch": 0.43, "grad_norm": 1.744345448894029, "learning_rate": 6.322274478125651e-06, "loss": 0.8225, "step": 6755 }, { "epoch": 0.43, "grad_norm": 1.3097466583630808, "learning_rate": 6.321274797532886e-06, "loss": 0.7522, "step": 6756 }, { "epoch": 0.43, "grad_norm": 1.5539718421927673, "learning_rate": 6.320275060154508e-06, "loss": 0.754, "step": 6757 }, { "epoch": 0.43, "grad_norm": 1.432714321924012, "learning_rate": 6.319275266033481e-06, "loss": 0.685, "step": 6758 }, { "epoch": 0.43, "grad_norm": 1.3557074815988333, "learning_rate": 6.318275415212777e-06, "loss": 0.5028, "step": 6759 }, { "epoch": 0.43, "grad_norm": 1.532924065776337, "learning_rate": 6.317275507735364e-06, "loss": 0.6429, "step": 6760 }, { "epoch": 0.43, "grad_norm": 1.4919980312884422, "learning_rate": 6.316275543644219e-06, "loss": 0.6717, "step": 6761 }, { "epoch": 0.43, "grad_norm": 1.3761274195228632, "learning_rate": 6.315275522982317e-06, "loss": 0.6431, "step": 6762 }, { "epoch": 0.43, "grad_norm": 1.584162963268015, "learning_rate": 6.314275445792637e-06, "loss": 0.7813, "step": 6763 }, { "epoch": 0.43, "grad_norm": 1.4278365334398875, "learning_rate": 6.313275312118159e-06, "loss": 0.6827, "step": 6764 }, { "epoch": 0.43, "grad_norm": 1.6432792435765853, "learning_rate": 6.312275122001867e-06, "loss": 0.6465, "step": 6765 }, { "epoch": 0.43, "grad_norm": 1.3498331275972, "learning_rate": 6.311274875486748e-06, "loss": 0.7311, "step": 6766 }, { "epoch": 0.43, "grad_norm": 1.666474272121294, "learning_rate": 6.310274572615792e-06, "loss": 0.6914, "step": 6767 }, { "epoch": 0.43, "grad_norm": 1.1823317188704485, "learning_rate": 6.309274213431987e-06, "loss": 0.6233, "step": 6768 }, { "epoch": 0.43, "grad_norm": 1.2200680261894303, "learning_rate": 6.308273797978328e-06, "loss": 0.7443, "step": 6769 }, { "epoch": 0.43, "grad_norm": 1.7024269656164084, "learning_rate": 6.307273326297811e-06, "loss": 0.7437, "step": 6770 }, { "epoch": 0.43, "grad_norm": 1.6465149759276114, "learning_rate": 6.30627279843343e-06, "loss": 0.6801, "step": 6771 }, { "epoch": 0.43, "grad_norm": 1.5619418316628386, "learning_rate": 6.305272214428192e-06, "loss": 0.6763, "step": 6772 }, { "epoch": 0.43, "grad_norm": 1.6147791265464315, "learning_rate": 6.304271574325096e-06, "loss": 0.7157, "step": 6773 }, { "epoch": 0.43, "grad_norm": 1.7582774629627678, "learning_rate": 6.303270878167148e-06, "loss": 0.6706, "step": 6774 }, { "epoch": 0.43, "grad_norm": 1.3884015603375641, "learning_rate": 6.3022701259973565e-06, "loss": 0.6146, "step": 6775 }, { "epoch": 0.43, "grad_norm": 1.7153988708121344, "learning_rate": 6.301269317858733e-06, "loss": 0.6247, "step": 6776 }, { "epoch": 0.43, "grad_norm": 1.5602931738641135, "learning_rate": 6.300268453794287e-06, "loss": 0.7453, "step": 6777 }, { "epoch": 0.43, "grad_norm": 1.4019728753699932, "learning_rate": 6.299267533847035e-06, "loss": 0.7101, "step": 6778 }, { "epoch": 0.43, "grad_norm": 1.697289819291588, "learning_rate": 6.298266558059995e-06, "loss": 0.7084, "step": 6779 }, { "epoch": 0.43, "grad_norm": 1.5314380846011688, "learning_rate": 6.297265526476186e-06, "loss": 0.7247, "step": 6780 }, { "epoch": 0.43, "grad_norm": 1.4037618901184041, "learning_rate": 6.296264439138631e-06, "loss": 0.73, "step": 6781 }, { "epoch": 0.43, "grad_norm": 1.4943919545914368, "learning_rate": 6.295263296090355e-06, "loss": 0.747, "step": 6782 }, { "epoch": 0.43, "grad_norm": 1.6553046789177457, "learning_rate": 6.294262097374383e-06, "loss": 0.7658, "step": 6783 }, { "epoch": 0.43, "grad_norm": 1.6033284751442383, "learning_rate": 6.293260843033745e-06, "loss": 0.7699, "step": 6784 }, { "epoch": 0.43, "grad_norm": 1.4580651877349358, "learning_rate": 6.292259533111474e-06, "loss": 0.6427, "step": 6785 }, { "epoch": 0.43, "grad_norm": 1.467495458112799, "learning_rate": 6.291258167650605e-06, "loss": 0.6463, "step": 6786 }, { "epoch": 0.43, "grad_norm": 1.4274724727715256, "learning_rate": 6.2902567466941725e-06, "loss": 0.6479, "step": 6787 }, { "epoch": 0.43, "grad_norm": 1.5228596991673609, "learning_rate": 6.289255270285215e-06, "loss": 0.6659, "step": 6788 }, { "epoch": 0.43, "grad_norm": 1.4874943731581998, "learning_rate": 6.288253738466777e-06, "loss": 0.7007, "step": 6789 }, { "epoch": 0.43, "grad_norm": 1.6795836741004733, "learning_rate": 6.2872521512819e-06, "loss": 0.7744, "step": 6790 }, { "epoch": 0.43, "grad_norm": 1.4491270330983652, "learning_rate": 6.286250508773631e-06, "loss": 0.6288, "step": 6791 }, { "epoch": 0.43, "grad_norm": 1.418610369018316, "learning_rate": 6.285248810985015e-06, "loss": 0.6752, "step": 6792 }, { "epoch": 0.43, "grad_norm": 1.7682495931990938, "learning_rate": 6.284247057959107e-06, "loss": 0.7121, "step": 6793 }, { "epoch": 0.43, "grad_norm": 1.4067999941360974, "learning_rate": 6.28324524973896e-06, "loss": 0.6115, "step": 6794 }, { "epoch": 0.43, "grad_norm": 1.606615890343347, "learning_rate": 6.282243386367628e-06, "loss": 0.6548, "step": 6795 }, { "epoch": 0.43, "grad_norm": 1.5271722039841715, "learning_rate": 6.281241467888171e-06, "loss": 0.7688, "step": 6796 }, { "epoch": 0.44, "grad_norm": 1.2825424435953012, "learning_rate": 6.280239494343647e-06, "loss": 0.7077, "step": 6797 }, { "epoch": 0.44, "grad_norm": 1.4512314305776957, "learning_rate": 6.2792374657771195e-06, "loss": 0.6583, "step": 6798 }, { "epoch": 0.44, "grad_norm": 1.6012470669609231, "learning_rate": 6.278235382231654e-06, "loss": 0.7063, "step": 6799 }, { "epoch": 0.44, "grad_norm": 1.5858977052781156, "learning_rate": 6.277233243750317e-06, "loss": 0.6015, "step": 6800 }, { "epoch": 0.44, "grad_norm": 1.4701292019388708, "learning_rate": 6.27623105037618e-06, "loss": 0.6984, "step": 6801 }, { "epoch": 0.44, "grad_norm": 1.5293625763070098, "learning_rate": 6.275228802152313e-06, "loss": 0.6477, "step": 6802 }, { "epoch": 0.44, "grad_norm": 1.1483144442399105, "learning_rate": 6.274226499121793e-06, "loss": 0.5678, "step": 6803 }, { "epoch": 0.44, "grad_norm": 1.4529303075806836, "learning_rate": 6.273224141327695e-06, "loss": 0.6319, "step": 6804 }, { "epoch": 0.44, "grad_norm": 1.492092945035294, "learning_rate": 6.272221728813099e-06, "loss": 0.7812, "step": 6805 }, { "epoch": 0.44, "grad_norm": 1.547799339439244, "learning_rate": 6.2712192616210866e-06, "loss": 0.6676, "step": 6806 }, { "epoch": 0.44, "grad_norm": 1.4499193489713436, "learning_rate": 6.270216739794741e-06, "loss": 0.808, "step": 6807 }, { "epoch": 0.44, "grad_norm": 1.4577594018688167, "learning_rate": 6.26921416337715e-06, "loss": 0.6752, "step": 6808 }, { "epoch": 0.44, "grad_norm": 1.9394400675859194, "learning_rate": 6.2682115324114e-06, "loss": 0.6617, "step": 6809 }, { "epoch": 0.44, "grad_norm": 1.6559717195436892, "learning_rate": 6.267208846940584e-06, "loss": 0.6218, "step": 6810 }, { "epoch": 0.44, "grad_norm": 1.7240962660779144, "learning_rate": 6.266206107007793e-06, "loss": 0.6313, "step": 6811 }, { "epoch": 0.44, "grad_norm": 1.7316541061642106, "learning_rate": 6.265203312656126e-06, "loss": 0.7264, "step": 6812 }, { "epoch": 0.44, "grad_norm": 1.4394207808333053, "learning_rate": 6.264200463928679e-06, "loss": 0.6711, "step": 6813 }, { "epoch": 0.44, "grad_norm": 1.5593848541532922, "learning_rate": 6.263197560868553e-06, "loss": 0.6696, "step": 6814 }, { "epoch": 0.44, "grad_norm": 1.5246958530687187, "learning_rate": 6.2621946035188474e-06, "loss": 0.647, "step": 6815 }, { "epoch": 0.44, "grad_norm": 1.4932296240170737, "learning_rate": 6.261191591922673e-06, "loss": 0.7773, "step": 6816 }, { "epoch": 0.44, "grad_norm": 1.7398748694217, "learning_rate": 6.260188526123133e-06, "loss": 0.6577, "step": 6817 }, { "epoch": 0.44, "grad_norm": 1.593348895405505, "learning_rate": 6.259185406163338e-06, "loss": 0.6328, "step": 6818 }, { "epoch": 0.44, "grad_norm": 1.7502103316500144, "learning_rate": 6.2581822320864e-06, "loss": 0.7342, "step": 6819 }, { "epoch": 0.44, "grad_norm": 1.457026278591762, "learning_rate": 6.257179003935435e-06, "loss": 0.6672, "step": 6820 }, { "epoch": 0.44, "grad_norm": 1.5923966031437797, "learning_rate": 6.256175721753556e-06, "loss": 0.6526, "step": 6821 }, { "epoch": 0.44, "grad_norm": 1.6097477097637467, "learning_rate": 6.255172385583884e-06, "loss": 0.6682, "step": 6822 }, { "epoch": 0.44, "grad_norm": 1.7415599329396652, "learning_rate": 6.254168995469541e-06, "loss": 0.7372, "step": 6823 }, { "epoch": 0.44, "grad_norm": 1.1346693774200836, "learning_rate": 6.253165551453652e-06, "loss": 0.7038, "step": 6824 }, { "epoch": 0.44, "grad_norm": 1.5447011559506676, "learning_rate": 6.252162053579338e-06, "loss": 0.6523, "step": 6825 }, { "epoch": 0.44, "grad_norm": 1.4620766431369916, "learning_rate": 6.251158501889732e-06, "loss": 0.7518, "step": 6826 }, { "epoch": 0.44, "grad_norm": 1.6058566125314617, "learning_rate": 6.250154896427962e-06, "loss": 0.5785, "step": 6827 }, { "epoch": 0.44, "grad_norm": 1.34833769583943, "learning_rate": 6.249151237237161e-06, "loss": 0.6706, "step": 6828 }, { "epoch": 0.44, "grad_norm": 1.5019998971538615, "learning_rate": 6.2481475243604654e-06, "loss": 0.7043, "step": 6829 }, { "epoch": 0.44, "grad_norm": 1.1488322413393945, "learning_rate": 6.24714375784101e-06, "loss": 0.6746, "step": 6830 }, { "epoch": 0.44, "grad_norm": 1.5791390501523273, "learning_rate": 6.246139937721939e-06, "loss": 0.677, "step": 6831 }, { "epoch": 0.44, "grad_norm": 1.588094100889139, "learning_rate": 6.245136064046391e-06, "loss": 0.6513, "step": 6832 }, { "epoch": 0.44, "grad_norm": 1.5213278178629752, "learning_rate": 6.244132136857511e-06, "loss": 0.6719, "step": 6833 }, { "epoch": 0.44, "grad_norm": 1.4859506843241708, "learning_rate": 6.243128156198447e-06, "loss": 0.8111, "step": 6834 }, { "epoch": 0.44, "grad_norm": 1.5927658338211068, "learning_rate": 6.242124122112347e-06, "loss": 0.6538, "step": 6835 }, { "epoch": 0.44, "grad_norm": 1.1930833704553063, "learning_rate": 6.241120034642361e-06, "loss": 0.626, "step": 6836 }, { "epoch": 0.44, "grad_norm": 1.6835965589642423, "learning_rate": 6.240115893831644e-06, "loss": 0.7818, "step": 6837 }, { "epoch": 0.44, "grad_norm": 1.6421809112560077, "learning_rate": 6.239111699723353e-06, "loss": 0.7163, "step": 6838 }, { "epoch": 0.44, "grad_norm": 1.3706609404147183, "learning_rate": 6.238107452360643e-06, "loss": 0.5873, "step": 6839 }, { "epoch": 0.44, "grad_norm": 2.3427137152006114, "learning_rate": 6.2371031517866785e-06, "loss": 0.6911, "step": 6840 }, { "epoch": 0.44, "grad_norm": 1.7581735014697863, "learning_rate": 6.236098798044619e-06, "loss": 0.7615, "step": 6841 }, { "epoch": 0.44, "grad_norm": 1.5768159594407698, "learning_rate": 6.235094391177631e-06, "loss": 0.696, "step": 6842 }, { "epoch": 0.44, "grad_norm": 1.5542771142307674, "learning_rate": 6.2340899312288795e-06, "loss": 0.6454, "step": 6843 }, { "epoch": 0.44, "grad_norm": 1.8487622118665026, "learning_rate": 6.233085418241538e-06, "loss": 0.6368, "step": 6844 }, { "epoch": 0.44, "grad_norm": 1.6689911272379625, "learning_rate": 6.232080852258776e-06, "loss": 0.5826, "step": 6845 }, { "epoch": 0.44, "grad_norm": 1.65079836980018, "learning_rate": 6.231076233323767e-06, "loss": 0.7524, "step": 6846 }, { "epoch": 0.44, "grad_norm": 1.5974760156788157, "learning_rate": 6.23007156147969e-06, "loss": 0.6915, "step": 6847 }, { "epoch": 0.44, "grad_norm": 1.3854718879470933, "learning_rate": 6.229066836769721e-06, "loss": 0.6245, "step": 6848 }, { "epoch": 0.44, "grad_norm": 1.1974797892661846, "learning_rate": 6.228062059237041e-06, "loss": 0.5904, "step": 6849 }, { "epoch": 0.44, "grad_norm": 1.5243498032184295, "learning_rate": 6.227057228924836e-06, "loss": 0.6198, "step": 6850 }, { "epoch": 0.44, "grad_norm": 2.945794747692917, "learning_rate": 6.226052345876288e-06, "loss": 0.7258, "step": 6851 }, { "epoch": 0.44, "grad_norm": 1.781843812868166, "learning_rate": 6.225047410134588e-06, "loss": 0.7202, "step": 6852 }, { "epoch": 0.44, "grad_norm": 1.6244572215044104, "learning_rate": 6.224042421742924e-06, "loss": 0.6679, "step": 6853 }, { "epoch": 0.44, "grad_norm": 1.424995088723755, "learning_rate": 6.223037380744489e-06, "loss": 0.6597, "step": 6854 }, { "epoch": 0.44, "grad_norm": 1.723592647043526, "learning_rate": 6.222032287182477e-06, "loss": 0.6205, "step": 6855 }, { "epoch": 0.44, "grad_norm": 1.4880723416343864, "learning_rate": 6.221027141100084e-06, "loss": 0.6976, "step": 6856 }, { "epoch": 0.44, "grad_norm": 1.411673067739276, "learning_rate": 6.220021942540512e-06, "loss": 0.6615, "step": 6857 }, { "epoch": 0.44, "grad_norm": 1.4740334608090901, "learning_rate": 6.2190166915469586e-06, "loss": 0.6738, "step": 6858 }, { "epoch": 0.44, "grad_norm": 1.232387748901282, "learning_rate": 6.21801138816263e-06, "loss": 0.6528, "step": 6859 }, { "epoch": 0.44, "grad_norm": 1.1089655115475034, "learning_rate": 6.217006032430732e-06, "loss": 0.602, "step": 6860 }, { "epoch": 0.44, "grad_norm": 1.5409128248917046, "learning_rate": 6.2160006243944726e-06, "loss": 0.6949, "step": 6861 }, { "epoch": 0.44, "grad_norm": 1.5668528247182714, "learning_rate": 6.214995164097062e-06, "loss": 0.7822, "step": 6862 }, { "epoch": 0.44, "grad_norm": 1.5347644240685314, "learning_rate": 6.213989651581711e-06, "loss": 0.6466, "step": 6863 }, { "epoch": 0.44, "grad_norm": 1.4410347814973197, "learning_rate": 6.212984086891635e-06, "loss": 0.7169, "step": 6864 }, { "epoch": 0.44, "grad_norm": 1.6414463811275573, "learning_rate": 6.211978470070052e-06, "loss": 0.7054, "step": 6865 }, { "epoch": 0.44, "grad_norm": 1.636537740205523, "learning_rate": 6.210972801160182e-06, "loss": 0.7705, "step": 6866 }, { "epoch": 0.44, "grad_norm": 1.5114943588270073, "learning_rate": 6.209967080205244e-06, "loss": 0.6114, "step": 6867 }, { "epoch": 0.44, "grad_norm": 1.7168306207212092, "learning_rate": 6.208961307248466e-06, "loss": 0.7158, "step": 6868 }, { "epoch": 0.44, "grad_norm": 1.7887564793709274, "learning_rate": 6.20795548233307e-06, "loss": 0.7062, "step": 6869 }, { "epoch": 0.44, "grad_norm": 1.4871965745921076, "learning_rate": 6.206949605502286e-06, "loss": 0.6224, "step": 6870 }, { "epoch": 0.44, "grad_norm": 1.4909552736071, "learning_rate": 6.205943676799344e-06, "loss": 0.7277, "step": 6871 }, { "epoch": 0.44, "grad_norm": 1.8268589417176915, "learning_rate": 6.204937696267475e-06, "loss": 0.71, "step": 6872 }, { "epoch": 0.44, "grad_norm": 1.244054448913078, "learning_rate": 6.203931663949918e-06, "loss": 0.5962, "step": 6873 }, { "epoch": 0.44, "grad_norm": 1.2748031536376934, "learning_rate": 6.202925579889908e-06, "loss": 0.6964, "step": 6874 }, { "epoch": 0.44, "grad_norm": 1.6296631299905497, "learning_rate": 6.201919444130684e-06, "loss": 0.6833, "step": 6875 }, { "epoch": 0.44, "grad_norm": 1.2322342694574064, "learning_rate": 6.200913256715486e-06, "loss": 0.66, "step": 6876 }, { "epoch": 0.44, "grad_norm": 1.5996266440760618, "learning_rate": 6.199907017687562e-06, "loss": 0.7352, "step": 6877 }, { "epoch": 0.44, "grad_norm": 1.0642394669923534, "learning_rate": 6.198900727090155e-06, "loss": 0.6477, "step": 6878 }, { "epoch": 0.44, "grad_norm": 1.8093759624282186, "learning_rate": 6.197894384966513e-06, "loss": 0.6824, "step": 6879 }, { "epoch": 0.44, "grad_norm": 1.6062188488675944, "learning_rate": 6.1968879913598874e-06, "loss": 0.6891, "step": 6880 }, { "epoch": 0.44, "grad_norm": 1.5145399154906274, "learning_rate": 6.195881546313533e-06, "loss": 0.6815, "step": 6881 }, { "epoch": 0.44, "grad_norm": 1.5583554737740577, "learning_rate": 6.194875049870701e-06, "loss": 0.6958, "step": 6882 }, { "epoch": 0.44, "grad_norm": 1.7446479491407363, "learning_rate": 6.193868502074651e-06, "loss": 0.7183, "step": 6883 }, { "epoch": 0.44, "grad_norm": 1.158472878288066, "learning_rate": 6.192861902968641e-06, "loss": 0.6251, "step": 6884 }, { "epoch": 0.44, "grad_norm": 1.560234648285288, "learning_rate": 6.191855252595933e-06, "loss": 0.7703, "step": 6885 }, { "epoch": 0.44, "grad_norm": 1.8265653665606707, "learning_rate": 6.1908485509997905e-06, "loss": 0.7033, "step": 6886 }, { "epoch": 0.44, "grad_norm": 1.6961239022099002, "learning_rate": 6.189841798223479e-06, "loss": 0.7036, "step": 6887 }, { "epoch": 0.44, "grad_norm": 1.4656776944852674, "learning_rate": 6.188834994310268e-06, "loss": 0.6482, "step": 6888 }, { "epoch": 0.44, "grad_norm": 1.8409640723975582, "learning_rate": 6.1878281393034275e-06, "loss": 0.654, "step": 6889 }, { "epoch": 0.44, "grad_norm": 1.6496453079537892, "learning_rate": 6.18682123324623e-06, "loss": 0.6409, "step": 6890 }, { "epoch": 0.44, "grad_norm": 1.4818526070838214, "learning_rate": 6.1858142761819484e-06, "loss": 0.6574, "step": 6891 }, { "epoch": 0.44, "grad_norm": 1.5595679733449326, "learning_rate": 6.184807268153862e-06, "loss": 0.6731, "step": 6892 }, { "epoch": 0.44, "grad_norm": 1.5105633288219633, "learning_rate": 6.1838002092052465e-06, "loss": 0.6193, "step": 6893 }, { "epoch": 0.44, "grad_norm": 1.701165618386437, "learning_rate": 6.182793099379387e-06, "loss": 0.6517, "step": 6894 }, { "epoch": 0.44, "grad_norm": 1.7951651733258696, "learning_rate": 6.181785938719566e-06, "loss": 0.7263, "step": 6895 }, { "epoch": 0.44, "grad_norm": 1.5136813106825624, "learning_rate": 6.180778727269067e-06, "loss": 0.7177, "step": 6896 }, { "epoch": 0.44, "grad_norm": 1.6138727877075503, "learning_rate": 6.179771465071182e-06, "loss": 0.7676, "step": 6897 }, { "epoch": 0.44, "grad_norm": 1.5404169724048655, "learning_rate": 6.178764152169198e-06, "loss": 0.7966, "step": 6898 }, { "epoch": 0.44, "grad_norm": 1.6016507701281992, "learning_rate": 6.177756788606406e-06, "loss": 0.622, "step": 6899 }, { "epoch": 0.44, "grad_norm": 1.8312499713521522, "learning_rate": 6.176749374426103e-06, "loss": 0.6634, "step": 6900 }, { "epoch": 0.44, "grad_norm": 1.3743280785376348, "learning_rate": 6.175741909671584e-06, "loss": 0.6908, "step": 6901 }, { "epoch": 0.44, "grad_norm": 1.3246022766714154, "learning_rate": 6.174734394386149e-06, "loss": 0.6784, "step": 6902 }, { "epoch": 0.44, "grad_norm": 1.4610490013498842, "learning_rate": 6.173726828613098e-06, "loss": 0.6732, "step": 6903 }, { "epoch": 0.44, "grad_norm": 1.4567990075408623, "learning_rate": 6.172719212395734e-06, "loss": 0.6935, "step": 6904 }, { "epoch": 0.44, "grad_norm": 1.5954382228544732, "learning_rate": 6.171711545777363e-06, "loss": 0.6786, "step": 6905 }, { "epoch": 0.44, "grad_norm": 1.4851844475862879, "learning_rate": 6.170703828801292e-06, "loss": 0.7408, "step": 6906 }, { "epoch": 0.44, "grad_norm": 1.2912030209222947, "learning_rate": 6.169696061510831e-06, "loss": 0.6282, "step": 6907 }, { "epoch": 0.44, "grad_norm": 1.4938301175333124, "learning_rate": 6.168688243949288e-06, "loss": 0.7601, "step": 6908 }, { "epoch": 0.44, "grad_norm": 1.7146356555526723, "learning_rate": 6.167680376159983e-06, "loss": 0.7394, "step": 6909 }, { "epoch": 0.44, "grad_norm": 1.788456236495566, "learning_rate": 6.166672458186228e-06, "loss": 0.7954, "step": 6910 }, { "epoch": 0.44, "grad_norm": 1.189432772283659, "learning_rate": 6.165664490071343e-06, "loss": 0.6723, "step": 6911 }, { "epoch": 0.44, "grad_norm": 1.4964133007301499, "learning_rate": 6.164656471858648e-06, "loss": 0.62, "step": 6912 }, { "epoch": 0.44, "grad_norm": 1.6354387774851367, "learning_rate": 6.163648403591462e-06, "loss": 0.6177, "step": 6913 }, { "epoch": 0.44, "grad_norm": 1.7564461158515838, "learning_rate": 6.162640285313116e-06, "loss": 0.6306, "step": 6914 }, { "epoch": 0.44, "grad_norm": 1.0490308778213084, "learning_rate": 6.161632117066932e-06, "loss": 0.6741, "step": 6915 }, { "epoch": 0.44, "grad_norm": 0.953796495548119, "learning_rate": 6.1606238988962405e-06, "loss": 0.6248, "step": 6916 }, { "epoch": 0.44, "grad_norm": 1.4545955570686078, "learning_rate": 6.1596156308443746e-06, "loss": 0.7082, "step": 6917 }, { "epoch": 0.44, "grad_norm": 1.3923519058049503, "learning_rate": 6.158607312954664e-06, "loss": 0.6976, "step": 6918 }, { "epoch": 0.44, "grad_norm": 1.4359162354267712, "learning_rate": 6.157598945270447e-06, "loss": 0.7227, "step": 6919 }, { "epoch": 0.44, "grad_norm": 1.804166288537071, "learning_rate": 6.156590527835058e-06, "loss": 0.7986, "step": 6920 }, { "epoch": 0.44, "grad_norm": 1.8843267111936182, "learning_rate": 6.1555820606918384e-06, "loss": 0.6217, "step": 6921 }, { "epoch": 0.44, "grad_norm": 1.543929555038544, "learning_rate": 6.15457354388413e-06, "loss": 0.7226, "step": 6922 }, { "epoch": 0.44, "grad_norm": 1.5664751887190806, "learning_rate": 6.153564977455278e-06, "loss": 0.6803, "step": 6923 }, { "epoch": 0.44, "grad_norm": 1.6978898466513053, "learning_rate": 6.152556361448627e-06, "loss": 0.6617, "step": 6924 }, { "epoch": 0.44, "grad_norm": 1.6041683519238046, "learning_rate": 6.151547695907525e-06, "loss": 0.718, "step": 6925 }, { "epoch": 0.44, "grad_norm": 1.4524172965807214, "learning_rate": 6.150538980875323e-06, "loss": 0.6663, "step": 6926 }, { "epoch": 0.44, "grad_norm": 1.7489994678092564, "learning_rate": 6.149530216395374e-06, "loss": 0.743, "step": 6927 }, { "epoch": 0.44, "grad_norm": 1.0314879150248646, "learning_rate": 6.148521402511031e-06, "loss": 0.6557, "step": 6928 }, { "epoch": 0.44, "grad_norm": 1.0195773839948432, "learning_rate": 6.1475125392656506e-06, "loss": 0.6727, "step": 6929 }, { "epoch": 0.44, "grad_norm": 1.4432923445890187, "learning_rate": 6.146503626702593e-06, "loss": 0.7396, "step": 6930 }, { "epoch": 0.44, "grad_norm": 1.5157881888617821, "learning_rate": 6.1454946648652204e-06, "loss": 0.688, "step": 6931 }, { "epoch": 0.44, "grad_norm": 1.5429428856715293, "learning_rate": 6.144485653796891e-06, "loss": 0.6868, "step": 6932 }, { "epoch": 0.44, "grad_norm": 1.2705327135155458, "learning_rate": 6.143476593540976e-06, "loss": 0.657, "step": 6933 }, { "epoch": 0.44, "grad_norm": 1.6572039470006719, "learning_rate": 6.142467484140838e-06, "loss": 0.6814, "step": 6934 }, { "epoch": 0.44, "grad_norm": 1.7564762009401336, "learning_rate": 6.1414583256398494e-06, "loss": 0.6419, "step": 6935 }, { "epoch": 0.44, "grad_norm": 1.775756281502193, "learning_rate": 6.14044911808138e-06, "loss": 0.6231, "step": 6936 }, { "epoch": 0.44, "grad_norm": 1.5707340902568456, "learning_rate": 6.139439861508804e-06, "loss": 0.7319, "step": 6937 }, { "epoch": 0.44, "grad_norm": 1.6169293863440282, "learning_rate": 6.138430555965497e-06, "loss": 0.6834, "step": 6938 }, { "epoch": 0.44, "grad_norm": 1.127145619441897, "learning_rate": 6.137421201494837e-06, "loss": 0.6449, "step": 6939 }, { "epoch": 0.44, "grad_norm": 1.0485643403627083, "learning_rate": 6.1364117981402035e-06, "loss": 0.5995, "step": 6940 }, { "epoch": 0.44, "grad_norm": 1.5152381305012288, "learning_rate": 6.135402345944979e-06, "loss": 0.5786, "step": 6941 }, { "epoch": 0.44, "grad_norm": 1.4607254614129745, "learning_rate": 6.134392844952547e-06, "loss": 0.7207, "step": 6942 }, { "epoch": 0.44, "grad_norm": 1.616074419994341, "learning_rate": 6.1333832952062945e-06, "loss": 0.7104, "step": 6943 }, { "epoch": 0.44, "grad_norm": 1.6520173717906357, "learning_rate": 6.132373696749609e-06, "loss": 0.6812, "step": 6944 }, { "epoch": 0.44, "grad_norm": 2.1654209969027565, "learning_rate": 6.1313640496258834e-06, "loss": 0.7657, "step": 6945 }, { "epoch": 0.44, "grad_norm": 1.545517729637975, "learning_rate": 6.130354353878507e-06, "loss": 0.6611, "step": 6946 }, { "epoch": 0.44, "grad_norm": 1.5563896836947835, "learning_rate": 6.129344609550876e-06, "loss": 0.6862, "step": 6947 }, { "epoch": 0.44, "grad_norm": 1.960051832430384, "learning_rate": 6.128334816686387e-06, "loss": 0.7258, "step": 6948 }, { "epoch": 0.44, "grad_norm": 1.6382894573061213, "learning_rate": 6.127324975328437e-06, "loss": 0.603, "step": 6949 }, { "epoch": 0.44, "grad_norm": 1.4945978244565645, "learning_rate": 6.1263150855204286e-06, "loss": 0.7176, "step": 6950 }, { "epoch": 0.44, "grad_norm": 1.4867919300025136, "learning_rate": 6.125305147305764e-06, "loss": 0.6795, "step": 6951 }, { "epoch": 0.44, "grad_norm": 1.6323697999469162, "learning_rate": 6.124295160727851e-06, "loss": 0.7049, "step": 6952 }, { "epoch": 0.45, "grad_norm": 1.5211798583715324, "learning_rate": 6.1232851258300944e-06, "loss": 0.7112, "step": 6953 }, { "epoch": 0.45, "grad_norm": 1.5641393036139508, "learning_rate": 6.122275042655902e-06, "loss": 0.6776, "step": 6954 }, { "epoch": 0.45, "grad_norm": 4.336318484905224, "learning_rate": 6.121264911248688e-06, "loss": 0.6794, "step": 6955 }, { "epoch": 0.45, "grad_norm": 1.5319736514244349, "learning_rate": 6.120254731651864e-06, "loss": 0.7292, "step": 6956 }, { "epoch": 0.45, "grad_norm": 1.3179646065861508, "learning_rate": 6.1192445039088435e-06, "loss": 0.7068, "step": 6957 }, { "epoch": 0.45, "grad_norm": 1.5715784714968781, "learning_rate": 6.1182342280630466e-06, "loss": 0.7174, "step": 6958 }, { "epoch": 0.45, "grad_norm": 1.3500357049530858, "learning_rate": 6.117223904157893e-06, "loss": 0.6842, "step": 6959 }, { "epoch": 0.45, "grad_norm": 1.5336454892859313, "learning_rate": 6.1162135322368045e-06, "loss": 0.6996, "step": 6960 }, { "epoch": 0.45, "grad_norm": 1.2416884974369025, "learning_rate": 6.115203112343203e-06, "loss": 0.7289, "step": 6961 }, { "epoch": 0.45, "grad_norm": 1.720433878031197, "learning_rate": 6.114192644520516e-06, "loss": 0.661, "step": 6962 }, { "epoch": 0.45, "grad_norm": 1.3673607492278097, "learning_rate": 6.11318212881217e-06, "loss": 0.6227, "step": 6963 }, { "epoch": 0.45, "grad_norm": 1.1269616261615627, "learning_rate": 6.112171565261594e-06, "loss": 0.6712, "step": 6964 }, { "epoch": 0.45, "grad_norm": 1.6960310166063983, "learning_rate": 6.111160953912222e-06, "loss": 0.6649, "step": 6965 }, { "epoch": 0.45, "grad_norm": 1.488844635954272, "learning_rate": 6.110150294807487e-06, "loss": 0.6268, "step": 6966 }, { "epoch": 0.45, "grad_norm": 1.4805209536045687, "learning_rate": 6.1091395879908255e-06, "loss": 0.7156, "step": 6967 }, { "epoch": 0.45, "grad_norm": 1.7537650836722791, "learning_rate": 6.108128833505675e-06, "loss": 0.6636, "step": 6968 }, { "epoch": 0.45, "grad_norm": 1.5046016799733457, "learning_rate": 6.107118031395475e-06, "loss": 0.7213, "step": 6969 }, { "epoch": 0.45, "grad_norm": 1.135063356946608, "learning_rate": 6.106107181703669e-06, "loss": 0.6359, "step": 6970 }, { "epoch": 0.45, "grad_norm": 1.7256967826231964, "learning_rate": 6.1050962844737005e-06, "loss": 0.6898, "step": 6971 }, { "epoch": 0.45, "grad_norm": 1.4178141521852308, "learning_rate": 6.104085339749015e-06, "loss": 0.6573, "step": 6972 }, { "epoch": 0.45, "grad_norm": 1.482882314625974, "learning_rate": 6.103074347573062e-06, "loss": 0.6826, "step": 6973 }, { "epoch": 0.45, "grad_norm": 1.6624505783441417, "learning_rate": 6.102063307989293e-06, "loss": 0.755, "step": 6974 }, { "epoch": 0.45, "grad_norm": 1.5516085305551979, "learning_rate": 6.1010522210411575e-06, "loss": 0.5867, "step": 6975 }, { "epoch": 0.45, "grad_norm": 2.6053116744833873, "learning_rate": 6.100041086772111e-06, "loss": 0.6448, "step": 6976 }, { "epoch": 0.45, "grad_norm": 1.7417498599354864, "learning_rate": 6.0990299052256105e-06, "loss": 0.7725, "step": 6977 }, { "epoch": 0.45, "grad_norm": 2.228940536870678, "learning_rate": 6.098018676445114e-06, "loss": 0.6984, "step": 6978 }, { "epoch": 0.45, "grad_norm": 1.5583084258530284, "learning_rate": 6.097007400474081e-06, "loss": 0.7045, "step": 6979 }, { "epoch": 0.45, "grad_norm": 1.7452606315064347, "learning_rate": 6.095996077355976e-06, "loss": 0.7225, "step": 6980 }, { "epoch": 0.45, "grad_norm": 1.3201473296326356, "learning_rate": 6.094984707134263e-06, "loss": 0.6961, "step": 6981 }, { "epoch": 0.45, "grad_norm": 1.0426040296749606, "learning_rate": 6.093973289852409e-06, "loss": 0.7137, "step": 6982 }, { "epoch": 0.45, "grad_norm": 1.6129412762307065, "learning_rate": 6.092961825553881e-06, "loss": 0.7868, "step": 6983 }, { "epoch": 0.45, "grad_norm": 1.5452233398430748, "learning_rate": 6.091950314282149e-06, "loss": 0.6489, "step": 6984 }, { "epoch": 0.45, "grad_norm": 1.6269960796004876, "learning_rate": 6.090938756080688e-06, "loss": 0.6519, "step": 6985 }, { "epoch": 0.45, "grad_norm": 1.6183187974207738, "learning_rate": 6.089927150992971e-06, "loss": 0.7627, "step": 6986 }, { "epoch": 0.45, "grad_norm": 1.5397819446980336, "learning_rate": 6.088915499062475e-06, "loss": 0.5845, "step": 6987 }, { "epoch": 0.45, "grad_norm": 1.7111308338096751, "learning_rate": 6.08790380033268e-06, "loss": 0.71, "step": 6988 }, { "epoch": 0.45, "grad_norm": 1.636978931345006, "learning_rate": 6.0868920548470654e-06, "loss": 0.7733, "step": 6989 }, { "epoch": 0.45, "grad_norm": 1.6740621531889444, "learning_rate": 6.0858802626491155e-06, "loss": 0.6882, "step": 6990 }, { "epoch": 0.45, "grad_norm": 1.3315232947206352, "learning_rate": 6.084868423782312e-06, "loss": 0.6226, "step": 6991 }, { "epoch": 0.45, "grad_norm": 1.5034105438346999, "learning_rate": 6.0838565382901435e-06, "loss": 0.6446, "step": 6992 }, { "epoch": 0.45, "grad_norm": 1.50034559976072, "learning_rate": 6.082844606216098e-06, "loss": 0.5929, "step": 6993 }, { "epoch": 0.45, "grad_norm": 1.9863089790723838, "learning_rate": 6.0818326276036675e-06, "loss": 0.7103, "step": 6994 }, { "epoch": 0.45, "grad_norm": 1.5567785416313953, "learning_rate": 6.080820602496345e-06, "loss": 0.6995, "step": 6995 }, { "epoch": 0.45, "grad_norm": 1.404841312598971, "learning_rate": 6.079808530937621e-06, "loss": 0.5984, "step": 6996 }, { "epoch": 0.45, "grad_norm": 1.9769351349365785, "learning_rate": 6.078796412970997e-06, "loss": 0.7573, "step": 6997 }, { "epoch": 0.45, "grad_norm": 1.4647718514962806, "learning_rate": 6.077784248639971e-06, "loss": 0.6564, "step": 6998 }, { "epoch": 0.45, "grad_norm": 1.1084643624283217, "learning_rate": 6.076772037988042e-06, "loss": 0.6615, "step": 6999 }, { "epoch": 0.45, "grad_norm": 1.6725382885609539, "learning_rate": 6.075759781058713e-06, "loss": 0.6937, "step": 7000 }, { "epoch": 0.45, "grad_norm": 1.115663674688307, "learning_rate": 6.07474747789549e-06, "loss": 0.6992, "step": 7001 }, { "epoch": 0.45, "grad_norm": 1.639463442434882, "learning_rate": 6.073735128541878e-06, "loss": 0.7687, "step": 7002 }, { "epoch": 0.45, "grad_norm": 1.5478143415937136, "learning_rate": 6.072722733041387e-06, "loss": 0.7757, "step": 7003 }, { "epoch": 0.45, "grad_norm": 1.62871421283813, "learning_rate": 6.071710291437527e-06, "loss": 0.721, "step": 7004 }, { "epoch": 0.45, "grad_norm": 1.471855796208435, "learning_rate": 6.07069780377381e-06, "loss": 0.6412, "step": 7005 }, { "epoch": 0.45, "grad_norm": 1.8803713357091825, "learning_rate": 6.069685270093751e-06, "loss": 0.8025, "step": 7006 }, { "epoch": 0.45, "grad_norm": 1.5723663332653042, "learning_rate": 6.068672690440868e-06, "loss": 0.6856, "step": 7007 }, { "epoch": 0.45, "grad_norm": 1.479536308513324, "learning_rate": 6.067660064858677e-06, "loss": 0.6726, "step": 7008 }, { "epoch": 0.45, "grad_norm": 1.579504673724132, "learning_rate": 6.066647393390701e-06, "loss": 0.6361, "step": 7009 }, { "epoch": 0.45, "grad_norm": 2.1595361418206696, "learning_rate": 6.0656346760804605e-06, "loss": 0.7291, "step": 7010 }, { "epoch": 0.45, "grad_norm": 1.4176070880162568, "learning_rate": 6.064621912971483e-06, "loss": 0.7698, "step": 7011 }, { "epoch": 0.45, "grad_norm": 1.4293711686761887, "learning_rate": 6.063609104107291e-06, "loss": 0.6505, "step": 7012 }, { "epoch": 0.45, "grad_norm": 1.449513072554987, "learning_rate": 6.062596249531414e-06, "loss": 0.7096, "step": 7013 }, { "epoch": 0.45, "grad_norm": 1.5277382958510652, "learning_rate": 6.061583349287383e-06, "loss": 0.6502, "step": 7014 }, { "epoch": 0.45, "grad_norm": 1.6346109191774316, "learning_rate": 6.060570403418731e-06, "loss": 0.6373, "step": 7015 }, { "epoch": 0.45, "grad_norm": 1.8565118037020087, "learning_rate": 6.0595574119689915e-06, "loss": 0.7309, "step": 7016 }, { "epoch": 0.45, "grad_norm": 1.9755928645676837, "learning_rate": 6.058544374981701e-06, "loss": 0.6592, "step": 7017 }, { "epoch": 0.45, "grad_norm": 1.727060217995175, "learning_rate": 6.057531292500398e-06, "loss": 0.6102, "step": 7018 }, { "epoch": 0.45, "grad_norm": 1.5706696730179173, "learning_rate": 6.056518164568622e-06, "loss": 0.723, "step": 7019 }, { "epoch": 0.45, "grad_norm": 1.1185293725264458, "learning_rate": 6.055504991229916e-06, "loss": 0.6957, "step": 7020 }, { "epoch": 0.45, "grad_norm": 1.6843915498164357, "learning_rate": 6.054491772527822e-06, "loss": 0.6666, "step": 7021 }, { "epoch": 0.45, "grad_norm": 1.5529150504474818, "learning_rate": 6.053478508505888e-06, "loss": 0.6618, "step": 7022 }, { "epoch": 0.45, "grad_norm": 1.0917361210953196, "learning_rate": 6.052465199207661e-06, "loss": 0.6648, "step": 7023 }, { "epoch": 0.45, "grad_norm": 1.009555439567183, "learning_rate": 6.051451844676691e-06, "loss": 0.6453, "step": 7024 }, { "epoch": 0.45, "grad_norm": 1.6503390494885772, "learning_rate": 6.050438444956531e-06, "loss": 0.6126, "step": 7025 }, { "epoch": 0.45, "grad_norm": 1.4570585145814021, "learning_rate": 6.049425000090734e-06, "loss": 0.7883, "step": 7026 }, { "epoch": 0.45, "grad_norm": 1.560104047571033, "learning_rate": 6.048411510122855e-06, "loss": 0.6359, "step": 7027 }, { "epoch": 0.45, "grad_norm": 1.4633186049076194, "learning_rate": 6.047397975096454e-06, "loss": 0.7363, "step": 7028 }, { "epoch": 0.45, "grad_norm": 1.4762944284252655, "learning_rate": 6.046384395055086e-06, "loss": 0.6886, "step": 7029 }, { "epoch": 0.45, "grad_norm": 1.4825660826757343, "learning_rate": 6.045370770042318e-06, "loss": 0.696, "step": 7030 }, { "epoch": 0.45, "grad_norm": 1.0329364663648175, "learning_rate": 6.04435710010171e-06, "loss": 0.6434, "step": 7031 }, { "epoch": 0.45, "grad_norm": 1.9102193103639686, "learning_rate": 6.0433433852768285e-06, "loss": 0.7172, "step": 7032 }, { "epoch": 0.45, "grad_norm": 1.5357369321882552, "learning_rate": 6.042329625611239e-06, "loss": 0.8529, "step": 7033 }, { "epoch": 0.45, "grad_norm": 1.5032571398911239, "learning_rate": 6.041315821148514e-06, "loss": 0.6037, "step": 7034 }, { "epoch": 0.45, "grad_norm": 1.2752048882425884, "learning_rate": 6.040301971932223e-06, "loss": 0.6833, "step": 7035 }, { "epoch": 0.45, "grad_norm": 1.586620895561508, "learning_rate": 6.0392880780059395e-06, "loss": 0.6859, "step": 7036 }, { "epoch": 0.45, "grad_norm": 1.5979716187421742, "learning_rate": 6.038274139413238e-06, "loss": 0.6463, "step": 7037 }, { "epoch": 0.45, "grad_norm": 1.762370397097247, "learning_rate": 6.0372601561976955e-06, "loss": 0.6754, "step": 7038 }, { "epoch": 0.45, "grad_norm": 1.4831894118599538, "learning_rate": 6.036246128402892e-06, "loss": 0.7671, "step": 7039 }, { "epoch": 0.45, "grad_norm": 1.4936291775924904, "learning_rate": 6.0352320560724066e-06, "loss": 0.7216, "step": 7040 }, { "epoch": 0.45, "grad_norm": 1.1418382081736564, "learning_rate": 6.034217939249823e-06, "loss": 0.6991, "step": 7041 }, { "epoch": 0.45, "grad_norm": 1.5421430282443092, "learning_rate": 6.033203777978724e-06, "loss": 0.7044, "step": 7042 }, { "epoch": 0.45, "grad_norm": 1.2026700749533774, "learning_rate": 6.0321895723027e-06, "loss": 0.6823, "step": 7043 }, { "epoch": 0.45, "grad_norm": 1.633458536892859, "learning_rate": 6.031175322265335e-06, "loss": 0.7421, "step": 7044 }, { "epoch": 0.45, "grad_norm": 1.253906654890762, "learning_rate": 6.030161027910223e-06, "loss": 0.632, "step": 7045 }, { "epoch": 0.45, "grad_norm": 1.1965696956718153, "learning_rate": 6.029146689280954e-06, "loss": 0.6544, "step": 7046 }, { "epoch": 0.45, "grad_norm": 1.6715343523149309, "learning_rate": 6.028132306421124e-06, "loss": 0.7466, "step": 7047 }, { "epoch": 0.45, "grad_norm": 1.5987360338270429, "learning_rate": 6.027117879374327e-06, "loss": 0.6678, "step": 7048 }, { "epoch": 0.45, "grad_norm": 2.1160663689160772, "learning_rate": 6.026103408184162e-06, "loss": 0.7205, "step": 7049 }, { "epoch": 0.45, "grad_norm": 1.535943859784092, "learning_rate": 6.025088892894227e-06, "loss": 0.6835, "step": 7050 }, { "epoch": 0.45, "grad_norm": 1.5718586231729585, "learning_rate": 6.0240743335481265e-06, "loss": 0.6873, "step": 7051 }, { "epoch": 0.45, "grad_norm": 1.5810054091356345, "learning_rate": 6.023059730189464e-06, "loss": 0.7052, "step": 7052 }, { "epoch": 0.45, "grad_norm": 1.7343600143779108, "learning_rate": 6.0220450828618424e-06, "loss": 0.7719, "step": 7053 }, { "epoch": 0.45, "grad_norm": 1.3749934181818344, "learning_rate": 6.021030391608872e-06, "loss": 0.6467, "step": 7054 }, { "epoch": 0.45, "grad_norm": 1.4020982480713458, "learning_rate": 6.0200156564741606e-06, "loss": 0.6899, "step": 7055 }, { "epoch": 0.45, "grad_norm": 1.0716735813374496, "learning_rate": 6.019000877501321e-06, "loss": 0.6758, "step": 7056 }, { "epoch": 0.45, "grad_norm": 1.8448945503851373, "learning_rate": 6.017986054733962e-06, "loss": 0.8699, "step": 7057 }, { "epoch": 0.45, "grad_norm": 2.2646555481745128, "learning_rate": 6.016971188215703e-06, "loss": 0.6932, "step": 7058 }, { "epoch": 0.45, "grad_norm": 1.4869715524604274, "learning_rate": 6.0159562779901605e-06, "loss": 0.7101, "step": 7059 }, { "epoch": 0.45, "grad_norm": 1.2243707950689926, "learning_rate": 6.0149413241009504e-06, "loss": 0.7071, "step": 7060 }, { "epoch": 0.45, "grad_norm": 1.2426419613149968, "learning_rate": 6.013926326591695e-06, "loss": 0.5924, "step": 7061 }, { "epoch": 0.45, "grad_norm": 2.0498857238548234, "learning_rate": 6.012911285506016e-06, "loss": 0.6318, "step": 7062 }, { "epoch": 0.45, "grad_norm": 0.9884151303009374, "learning_rate": 6.0118962008875395e-06, "loss": 0.6555, "step": 7063 }, { "epoch": 0.45, "grad_norm": 1.427621550695856, "learning_rate": 6.010881072779891e-06, "loss": 0.6021, "step": 7064 }, { "epoch": 0.45, "grad_norm": 0.9317424549189381, "learning_rate": 6.009865901226697e-06, "loss": 0.568, "step": 7065 }, { "epoch": 0.45, "grad_norm": 1.5729668021241512, "learning_rate": 6.008850686271589e-06, "loss": 0.708, "step": 7066 }, { "epoch": 0.45, "grad_norm": 1.74214866341376, "learning_rate": 6.007835427958199e-06, "loss": 0.7069, "step": 7067 }, { "epoch": 0.45, "grad_norm": 1.4410141739146127, "learning_rate": 6.006820126330159e-06, "loss": 0.6499, "step": 7068 }, { "epoch": 0.45, "grad_norm": 1.6258788742985857, "learning_rate": 6.005804781431106e-06, "loss": 0.7116, "step": 7069 }, { "epoch": 0.45, "grad_norm": 1.549065100638932, "learning_rate": 6.0047893933046765e-06, "loss": 0.6672, "step": 7070 }, { "epoch": 0.45, "grad_norm": 1.5328085365018174, "learning_rate": 6.0037739619945114e-06, "loss": 0.6192, "step": 7071 }, { "epoch": 0.45, "grad_norm": 1.5064896150350358, "learning_rate": 6.002758487544249e-06, "loss": 0.6697, "step": 7072 }, { "epoch": 0.45, "grad_norm": 1.4605540392402467, "learning_rate": 6.001742969997535e-06, "loss": 0.7418, "step": 7073 }, { "epoch": 0.45, "grad_norm": 1.6184475345043885, "learning_rate": 6.000727409398013e-06, "loss": 0.6803, "step": 7074 }, { "epoch": 0.45, "grad_norm": 1.6156627923568412, "learning_rate": 5.99971180578933e-06, "loss": 0.6956, "step": 7075 }, { "epoch": 0.45, "grad_norm": 1.2565386148290278, "learning_rate": 5.998696159215134e-06, "loss": 0.7679, "step": 7076 }, { "epoch": 0.45, "grad_norm": 1.5456353300141006, "learning_rate": 5.997680469719076e-06, "loss": 0.6106, "step": 7077 }, { "epoch": 0.45, "grad_norm": 1.5705401603528457, "learning_rate": 5.996664737344808e-06, "loss": 0.6485, "step": 7078 }, { "epoch": 0.45, "grad_norm": 1.5092022595514887, "learning_rate": 5.995648962135983e-06, "loss": 0.7425, "step": 7079 }, { "epoch": 0.45, "grad_norm": 1.4610283138127138, "learning_rate": 5.994633144136257e-06, "loss": 0.7634, "step": 7080 }, { "epoch": 0.45, "grad_norm": 1.5713106604271072, "learning_rate": 5.993617283389289e-06, "loss": 0.6688, "step": 7081 }, { "epoch": 0.45, "grad_norm": 1.4322370352059395, "learning_rate": 5.9926013799387396e-06, "loss": 0.7009, "step": 7082 }, { "epoch": 0.45, "grad_norm": 1.4016701073482152, "learning_rate": 5.991585433828267e-06, "loss": 0.6562, "step": 7083 }, { "epoch": 0.45, "grad_norm": 1.5870654914906444, "learning_rate": 5.990569445101537e-06, "loss": 0.6916, "step": 7084 }, { "epoch": 0.45, "grad_norm": 1.4937751960716987, "learning_rate": 5.9895534138022136e-06, "loss": 0.6168, "step": 7085 }, { "epoch": 0.45, "grad_norm": 1.5936410286463014, "learning_rate": 5.988537339973963e-06, "loss": 0.7062, "step": 7086 }, { "epoch": 0.45, "grad_norm": 1.627414199632036, "learning_rate": 5.9875212236604564e-06, "loss": 0.7535, "step": 7087 }, { "epoch": 0.45, "grad_norm": 1.506685218492741, "learning_rate": 5.986505064905361e-06, "loss": 0.6376, "step": 7088 }, { "epoch": 0.45, "grad_norm": 1.6237011270958301, "learning_rate": 5.985488863752351e-06, "loss": 0.7079, "step": 7089 }, { "epoch": 0.45, "grad_norm": 1.3930666380909993, "learning_rate": 5.984472620245101e-06, "loss": 0.6997, "step": 7090 }, { "epoch": 0.45, "grad_norm": 1.6073334264473889, "learning_rate": 5.983456334427286e-06, "loss": 0.7751, "step": 7091 }, { "epoch": 0.45, "grad_norm": 1.7843057005783813, "learning_rate": 5.982440006342586e-06, "loss": 0.7243, "step": 7092 }, { "epoch": 0.45, "grad_norm": 1.421720987025508, "learning_rate": 5.9814236360346765e-06, "loss": 0.7659, "step": 7093 }, { "epoch": 0.45, "grad_norm": 1.3746410087113072, "learning_rate": 5.980407223547243e-06, "loss": 0.6829, "step": 7094 }, { "epoch": 0.45, "grad_norm": 1.7056604610632695, "learning_rate": 5.9793907689239675e-06, "loss": 0.6328, "step": 7095 }, { "epoch": 0.45, "grad_norm": 1.5495189452936802, "learning_rate": 5.978374272208534e-06, "loss": 0.6088, "step": 7096 }, { "epoch": 0.45, "grad_norm": 1.5881449884590912, "learning_rate": 5.97735773344463e-06, "loss": 0.6545, "step": 7097 }, { "epoch": 0.45, "grad_norm": 1.5758637778678868, "learning_rate": 5.976341152675943e-06, "loss": 0.7667, "step": 7098 }, { "epoch": 0.45, "grad_norm": 1.804400426830751, "learning_rate": 5.975324529946166e-06, "loss": 0.6757, "step": 7099 }, { "epoch": 0.45, "grad_norm": 1.207439826091522, "learning_rate": 5.9743078652989905e-06, "loss": 0.7737, "step": 7100 }, { "epoch": 0.45, "grad_norm": 1.8831525600367256, "learning_rate": 5.973291158778109e-06, "loss": 0.7199, "step": 7101 }, { "epoch": 0.45, "grad_norm": 1.4541011234847128, "learning_rate": 5.97227441042722e-06, "loss": 0.6825, "step": 7102 }, { "epoch": 0.45, "grad_norm": 1.3490105645767916, "learning_rate": 5.97125762029002e-06, "loss": 0.7547, "step": 7103 }, { "epoch": 0.45, "grad_norm": 1.7594841585655023, "learning_rate": 5.970240788410209e-06, "loss": 0.6966, "step": 7104 }, { "epoch": 0.45, "grad_norm": 1.4494055645639574, "learning_rate": 5.969223914831485e-06, "loss": 0.7232, "step": 7105 }, { "epoch": 0.45, "grad_norm": 1.5504515258285543, "learning_rate": 5.968206999597557e-06, "loss": 0.6065, "step": 7106 }, { "epoch": 0.45, "grad_norm": 1.1837975264139071, "learning_rate": 5.967190042752123e-06, "loss": 0.5714, "step": 7107 }, { "epoch": 0.45, "grad_norm": 1.7462313592297014, "learning_rate": 5.966173044338895e-06, "loss": 0.726, "step": 7108 }, { "epoch": 0.46, "grad_norm": 1.256097943936462, "learning_rate": 5.965156004401581e-06, "loss": 0.6674, "step": 7109 }, { "epoch": 0.46, "grad_norm": 2.17281968938, "learning_rate": 5.964138922983889e-06, "loss": 0.7939, "step": 7110 }, { "epoch": 0.46, "grad_norm": 1.5346771853721697, "learning_rate": 5.9631218001295325e-06, "loss": 0.7167, "step": 7111 }, { "epoch": 0.46, "grad_norm": 1.3624370295071602, "learning_rate": 5.962104635882225e-06, "loss": 0.6683, "step": 7112 }, { "epoch": 0.46, "grad_norm": 1.9279290172352692, "learning_rate": 5.961087430285681e-06, "loss": 0.672, "step": 7113 }, { "epoch": 0.46, "grad_norm": 1.4936449890789996, "learning_rate": 5.9600701833836185e-06, "loss": 0.7316, "step": 7114 }, { "epoch": 0.46, "grad_norm": 1.5146329422249871, "learning_rate": 5.959052895219758e-06, "loss": 0.6637, "step": 7115 }, { "epoch": 0.46, "grad_norm": 1.479386438924537, "learning_rate": 5.958035565837819e-06, "loss": 0.7986, "step": 7116 }, { "epoch": 0.46, "grad_norm": 1.5522372738072365, "learning_rate": 5.957018195281523e-06, "loss": 0.6875, "step": 7117 }, { "epoch": 0.46, "grad_norm": 1.4368498856594512, "learning_rate": 5.956000783594598e-06, "loss": 0.7526, "step": 7118 }, { "epoch": 0.46, "grad_norm": 1.45185808032261, "learning_rate": 5.954983330820767e-06, "loss": 0.6336, "step": 7119 }, { "epoch": 0.46, "grad_norm": 1.5736466362910673, "learning_rate": 5.95396583700376e-06, "loss": 0.7355, "step": 7120 }, { "epoch": 0.46, "grad_norm": 1.7377136071891632, "learning_rate": 5.9529483021873055e-06, "loss": 0.7428, "step": 7121 }, { "epoch": 0.46, "grad_norm": 1.6297344967140786, "learning_rate": 5.951930726415135e-06, "loss": 0.742, "step": 7122 }, { "epoch": 0.46, "grad_norm": 1.8173581657895295, "learning_rate": 5.950913109730983e-06, "loss": 0.6783, "step": 7123 }, { "epoch": 0.46, "grad_norm": 1.7105103649512525, "learning_rate": 5.949895452178582e-06, "loss": 0.6954, "step": 7124 }, { "epoch": 0.46, "grad_norm": 1.6136156024388104, "learning_rate": 5.948877753801673e-06, "loss": 0.6873, "step": 7125 }, { "epoch": 0.46, "grad_norm": 1.4962994232088414, "learning_rate": 5.947860014643989e-06, "loss": 0.5938, "step": 7126 }, { "epoch": 0.46, "grad_norm": 1.6378250263183076, "learning_rate": 5.946842234749275e-06, "loss": 0.6709, "step": 7127 }, { "epoch": 0.46, "grad_norm": 1.758547272051296, "learning_rate": 5.945824414161272e-06, "loss": 0.6867, "step": 7128 }, { "epoch": 0.46, "grad_norm": 1.7434176779533501, "learning_rate": 5.944806552923722e-06, "loss": 0.7057, "step": 7129 }, { "epoch": 0.46, "grad_norm": 1.9333189993065474, "learning_rate": 5.943788651080372e-06, "loss": 0.8217, "step": 7130 }, { "epoch": 0.46, "grad_norm": 1.5540890680065755, "learning_rate": 5.942770708674969e-06, "loss": 0.6337, "step": 7131 }, { "epoch": 0.46, "grad_norm": 1.4619731384159276, "learning_rate": 5.941752725751262e-06, "loss": 0.6962, "step": 7132 }, { "epoch": 0.46, "grad_norm": 1.5736139252842398, "learning_rate": 5.940734702353002e-06, "loss": 0.5959, "step": 7133 }, { "epoch": 0.46, "grad_norm": 1.6776920439484395, "learning_rate": 5.939716638523941e-06, "loss": 0.7415, "step": 7134 }, { "epoch": 0.46, "grad_norm": 1.2972687791102433, "learning_rate": 5.938698534307833e-06, "loss": 0.761, "step": 7135 }, { "epoch": 0.46, "grad_norm": 0.986963678535253, "learning_rate": 5.937680389748436e-06, "loss": 0.6997, "step": 7136 }, { "epoch": 0.46, "grad_norm": 1.7482856104824933, "learning_rate": 5.936662204889504e-06, "loss": 0.6235, "step": 7137 }, { "epoch": 0.46, "grad_norm": 1.3502540871206294, "learning_rate": 5.9356439797748e-06, "loss": 0.7407, "step": 7138 }, { "epoch": 0.46, "grad_norm": 1.944774088780583, "learning_rate": 5.934625714448084e-06, "loss": 0.6902, "step": 7139 }, { "epoch": 0.46, "grad_norm": 1.464706852489367, "learning_rate": 5.933607408953118e-06, "loss": 0.7214, "step": 7140 }, { "epoch": 0.46, "grad_norm": 1.4690253843219823, "learning_rate": 5.932589063333668e-06, "loss": 0.7114, "step": 7141 }, { "epoch": 0.46, "grad_norm": 1.7333849180690102, "learning_rate": 5.9315706776335005e-06, "loss": 0.7288, "step": 7142 }, { "epoch": 0.46, "grad_norm": 1.5137002924202543, "learning_rate": 5.9305522518963795e-06, "loss": 0.6763, "step": 7143 }, { "epoch": 0.46, "grad_norm": 2.045404456575576, "learning_rate": 5.9295337861660795e-06, "loss": 0.7589, "step": 7144 }, { "epoch": 0.46, "grad_norm": 1.5540976064427008, "learning_rate": 5.928515280486372e-06, "loss": 0.6914, "step": 7145 }, { "epoch": 0.46, "grad_norm": 1.3456686233174262, "learning_rate": 5.9274967349010286e-06, "loss": 0.6688, "step": 7146 }, { "epoch": 0.46, "grad_norm": 1.2134677193104562, "learning_rate": 5.9264781494538235e-06, "loss": 0.6717, "step": 7147 }, { "epoch": 0.46, "grad_norm": 1.5466947388496521, "learning_rate": 5.925459524188535e-06, "loss": 0.6999, "step": 7148 }, { "epoch": 0.46, "grad_norm": 1.3423394671351285, "learning_rate": 5.924440859148941e-06, "loss": 0.6144, "step": 7149 }, { "epoch": 0.46, "grad_norm": 1.6881732002784302, "learning_rate": 5.923422154378821e-06, "loss": 0.7034, "step": 7150 }, { "epoch": 0.46, "grad_norm": 1.0771313460417706, "learning_rate": 5.922403409921957e-06, "loss": 0.6303, "step": 7151 }, { "epoch": 0.46, "grad_norm": 1.5277993872189848, "learning_rate": 5.921384625822133e-06, "loss": 0.6346, "step": 7152 }, { "epoch": 0.46, "grad_norm": 1.1244300942229604, "learning_rate": 5.9203658021231335e-06, "loss": 0.7138, "step": 7153 }, { "epoch": 0.46, "grad_norm": 1.5628635598441487, "learning_rate": 5.919346938868745e-06, "loss": 0.6334, "step": 7154 }, { "epoch": 0.46, "grad_norm": 1.6235475926029639, "learning_rate": 5.918328036102758e-06, "loss": 0.7103, "step": 7155 }, { "epoch": 0.46, "grad_norm": 1.593885119334622, "learning_rate": 5.9173090938689626e-06, "loss": 0.8256, "step": 7156 }, { "epoch": 0.46, "grad_norm": 1.6361093861595049, "learning_rate": 5.916290112211149e-06, "loss": 0.7254, "step": 7157 }, { "epoch": 0.46, "grad_norm": 1.735975618657347, "learning_rate": 5.91527109117311e-06, "loss": 0.7069, "step": 7158 }, { "epoch": 0.46, "grad_norm": 1.6499895652537675, "learning_rate": 5.9142520307986455e-06, "loss": 0.6561, "step": 7159 }, { "epoch": 0.46, "grad_norm": 1.5135609689347413, "learning_rate": 5.91323293113155e-06, "loss": 0.7536, "step": 7160 }, { "epoch": 0.46, "grad_norm": 1.478749715478211, "learning_rate": 5.91221379221562e-06, "loss": 0.7397, "step": 7161 }, { "epoch": 0.46, "grad_norm": 1.4908282350941913, "learning_rate": 5.91119461409466e-06, "loss": 0.7327, "step": 7162 }, { "epoch": 0.46, "grad_norm": 1.5329027743531007, "learning_rate": 5.910175396812468e-06, "loss": 0.6506, "step": 7163 }, { "epoch": 0.46, "grad_norm": 1.5603448471547392, "learning_rate": 5.9091561404128505e-06, "loss": 0.7077, "step": 7164 }, { "epoch": 0.46, "grad_norm": 1.7169728231035613, "learning_rate": 5.908136844939612e-06, "loss": 0.6485, "step": 7165 }, { "epoch": 0.46, "grad_norm": 1.6179385629776304, "learning_rate": 5.9071175104365616e-06, "loss": 0.6641, "step": 7166 }, { "epoch": 0.46, "grad_norm": 1.5602515969602213, "learning_rate": 5.906098136947506e-06, "loss": 0.7996, "step": 7167 }, { "epoch": 0.46, "grad_norm": 1.343052152271802, "learning_rate": 5.905078724516258e-06, "loss": 0.751, "step": 7168 }, { "epoch": 0.46, "grad_norm": 1.4432540124498217, "learning_rate": 5.904059273186627e-06, "loss": 0.6589, "step": 7169 }, { "epoch": 0.46, "grad_norm": 1.5888531287908507, "learning_rate": 5.903039783002428e-06, "loss": 0.7867, "step": 7170 }, { "epoch": 0.46, "grad_norm": 1.569737273125768, "learning_rate": 5.9020202540074755e-06, "loss": 0.7012, "step": 7171 }, { "epoch": 0.46, "grad_norm": 1.5457142908324417, "learning_rate": 5.901000686245588e-06, "loss": 0.6024, "step": 7172 }, { "epoch": 0.46, "grad_norm": 3.7769814872604197, "learning_rate": 5.899981079760586e-06, "loss": 0.7222, "step": 7173 }, { "epoch": 0.46, "grad_norm": 1.7173274170690587, "learning_rate": 5.898961434596289e-06, "loss": 0.6645, "step": 7174 }, { "epoch": 0.46, "grad_norm": 1.5440018587472828, "learning_rate": 5.897941750796517e-06, "loss": 0.7503, "step": 7175 }, { "epoch": 0.46, "grad_norm": 1.4890935500273454, "learning_rate": 5.896922028405095e-06, "loss": 0.7646, "step": 7176 }, { "epoch": 0.46, "grad_norm": 2.1239695975396007, "learning_rate": 5.895902267465851e-06, "loss": 0.6488, "step": 7177 }, { "epoch": 0.46, "grad_norm": 1.655936871422997, "learning_rate": 5.894882468022608e-06, "loss": 0.6745, "step": 7178 }, { "epoch": 0.46, "grad_norm": 1.699625128993268, "learning_rate": 5.893862630119197e-06, "loss": 0.7466, "step": 7179 }, { "epoch": 0.46, "grad_norm": 1.4728442086254805, "learning_rate": 5.892842753799449e-06, "loss": 0.6468, "step": 7180 }, { "epoch": 0.46, "grad_norm": 1.5887391477766182, "learning_rate": 5.891822839107195e-06, "loss": 0.6642, "step": 7181 }, { "epoch": 0.46, "grad_norm": 1.0130089829824633, "learning_rate": 5.8908028860862695e-06, "loss": 0.6249, "step": 7182 }, { "epoch": 0.46, "grad_norm": 2.040494173472763, "learning_rate": 5.8897828947805094e-06, "loss": 0.7832, "step": 7183 }, { "epoch": 0.46, "grad_norm": 3.059265557499835, "learning_rate": 5.8887628652337495e-06, "loss": 0.5736, "step": 7184 }, { "epoch": 0.46, "grad_norm": 1.1823719698931234, "learning_rate": 5.887742797489828e-06, "loss": 0.6278, "step": 7185 }, { "epoch": 0.46, "grad_norm": 1.5700902940641843, "learning_rate": 5.886722691592587e-06, "loss": 0.7113, "step": 7186 }, { "epoch": 0.46, "grad_norm": 1.4983292121555, "learning_rate": 5.8857025475858676e-06, "loss": 0.6356, "step": 7187 }, { "epoch": 0.46, "grad_norm": 1.4841310498358151, "learning_rate": 5.8846823655135155e-06, "loss": 0.7019, "step": 7188 }, { "epoch": 0.46, "grad_norm": 1.6154148863331674, "learning_rate": 5.883662145419373e-06, "loss": 0.6918, "step": 7189 }, { "epoch": 0.46, "grad_norm": 1.066187419812095, "learning_rate": 5.882641887347289e-06, "loss": 0.5948, "step": 7190 }, { "epoch": 0.46, "grad_norm": 1.0930051912650938, "learning_rate": 5.881621591341109e-06, "loss": 0.6454, "step": 7191 }, { "epoch": 0.46, "grad_norm": 1.2759406171680838, "learning_rate": 5.880601257444688e-06, "loss": 0.5814, "step": 7192 }, { "epoch": 0.46, "grad_norm": 1.7907380127442967, "learning_rate": 5.879580885701874e-06, "loss": 0.6486, "step": 7193 }, { "epoch": 0.46, "grad_norm": 1.5770867333850298, "learning_rate": 5.878560476156523e-06, "loss": 0.7382, "step": 7194 }, { "epoch": 0.46, "grad_norm": 1.9079239232224194, "learning_rate": 5.877540028852489e-06, "loss": 0.7429, "step": 7195 }, { "epoch": 0.46, "grad_norm": 1.2719048618404554, "learning_rate": 5.876519543833628e-06, "loss": 0.7071, "step": 7196 }, { "epoch": 0.46, "grad_norm": 1.7197707364061254, "learning_rate": 5.875499021143799e-06, "loss": 0.6039, "step": 7197 }, { "epoch": 0.46, "grad_norm": 1.4692847078435103, "learning_rate": 5.874478460826861e-06, "loss": 0.6796, "step": 7198 }, { "epoch": 0.46, "grad_norm": 1.0927781938586818, "learning_rate": 5.873457862926677e-06, "loss": 0.6212, "step": 7199 }, { "epoch": 0.46, "grad_norm": 1.5755218210652961, "learning_rate": 5.872437227487109e-06, "loss": 0.717, "step": 7200 }, { "epoch": 0.46, "grad_norm": 0.8907233022764962, "learning_rate": 5.871416554552021e-06, "loss": 0.5997, "step": 7201 }, { "epoch": 0.46, "grad_norm": 1.586489724414523, "learning_rate": 5.870395844165282e-06, "loss": 0.7446, "step": 7202 }, { "epoch": 0.46, "grad_norm": 1.9077600008822864, "learning_rate": 5.869375096370759e-06, "loss": 0.6816, "step": 7203 }, { "epoch": 0.46, "grad_norm": 1.6959701668327134, "learning_rate": 5.868354311212321e-06, "loss": 0.7388, "step": 7204 }, { "epoch": 0.46, "grad_norm": 1.4551845982082974, "learning_rate": 5.86733348873384e-06, "loss": 0.5936, "step": 7205 }, { "epoch": 0.46, "grad_norm": 1.430087813867723, "learning_rate": 5.866312628979188e-06, "loss": 0.5815, "step": 7206 }, { "epoch": 0.46, "grad_norm": 1.5963401821608363, "learning_rate": 5.8652917319922374e-06, "loss": 0.7485, "step": 7207 }, { "epoch": 0.46, "grad_norm": 1.4665861490953522, "learning_rate": 5.864270797816868e-06, "loss": 0.6931, "step": 7208 }, { "epoch": 0.46, "grad_norm": 1.64275589351291, "learning_rate": 5.863249826496955e-06, "loss": 0.6777, "step": 7209 }, { "epoch": 0.46, "grad_norm": 1.4240545646809741, "learning_rate": 5.862228818076378e-06, "loss": 0.6863, "step": 7210 }, { "epoch": 0.46, "grad_norm": 2.5241199847097735, "learning_rate": 5.8612077725990206e-06, "loss": 0.7346, "step": 7211 }, { "epoch": 0.46, "grad_norm": 1.476745769569888, "learning_rate": 5.860186690108762e-06, "loss": 0.7813, "step": 7212 }, { "epoch": 0.46, "grad_norm": 1.7421963756430707, "learning_rate": 5.859165570649485e-06, "loss": 0.5881, "step": 7213 }, { "epoch": 0.46, "grad_norm": 1.1165206733890585, "learning_rate": 5.858144414265079e-06, "loss": 0.6346, "step": 7214 }, { "epoch": 0.46, "grad_norm": 1.508572726436001, "learning_rate": 5.857123220999429e-06, "loss": 0.6033, "step": 7215 }, { "epoch": 0.46, "grad_norm": 1.7814182936645757, "learning_rate": 5.856101990896424e-06, "loss": 0.7632, "step": 7216 }, { "epoch": 0.46, "grad_norm": 1.7494059787823926, "learning_rate": 5.855080723999954e-06, "loss": 0.701, "step": 7217 }, { "epoch": 0.46, "grad_norm": 1.3115153881889707, "learning_rate": 5.85405942035391e-06, "loss": 0.6272, "step": 7218 }, { "epoch": 0.46, "grad_norm": 1.4535644157043648, "learning_rate": 5.853038080002189e-06, "loss": 0.6725, "step": 7219 }, { "epoch": 0.46, "grad_norm": 1.4084903421867183, "learning_rate": 5.852016702988683e-06, "loss": 0.6356, "step": 7220 }, { "epoch": 0.46, "grad_norm": 1.3194023345880297, "learning_rate": 5.85099528935729e-06, "loss": 0.6497, "step": 7221 }, { "epoch": 0.46, "grad_norm": 1.5123353033964775, "learning_rate": 5.849973839151906e-06, "loss": 0.7592, "step": 7222 }, { "epoch": 0.46, "grad_norm": 1.6128871812308572, "learning_rate": 5.848952352416434e-06, "loss": 0.7102, "step": 7223 }, { "epoch": 0.46, "grad_norm": 1.1145427814868367, "learning_rate": 5.847930829194773e-06, "loss": 0.6796, "step": 7224 }, { "epoch": 0.46, "grad_norm": 1.6881039186500353, "learning_rate": 5.8469092695308274e-06, "loss": 0.6357, "step": 7225 }, { "epoch": 0.46, "grad_norm": 1.7861777909713135, "learning_rate": 5.845887673468501e-06, "loss": 0.7491, "step": 7226 }, { "epoch": 0.46, "grad_norm": 1.678313868912561, "learning_rate": 5.844866041051699e-06, "loss": 0.6308, "step": 7227 }, { "epoch": 0.46, "grad_norm": 1.7527042013575995, "learning_rate": 5.84384437232433e-06, "loss": 0.6637, "step": 7228 }, { "epoch": 0.46, "grad_norm": 1.6311930350292596, "learning_rate": 5.8428226673303026e-06, "loss": 0.7459, "step": 7229 }, { "epoch": 0.46, "grad_norm": 1.6697949011369797, "learning_rate": 5.8418009261135286e-06, "loss": 0.6694, "step": 7230 }, { "epoch": 0.46, "grad_norm": 1.7185792144591552, "learning_rate": 5.84077914871792e-06, "loss": 0.6869, "step": 7231 }, { "epoch": 0.46, "grad_norm": 1.5586143814474474, "learning_rate": 5.83975733518739e-06, "loss": 0.64, "step": 7232 }, { "epoch": 0.46, "grad_norm": 1.5908355401790732, "learning_rate": 5.838735485565855e-06, "loss": 0.6837, "step": 7233 }, { "epoch": 0.46, "grad_norm": 1.5726200381300455, "learning_rate": 5.83771359989723e-06, "loss": 0.6808, "step": 7234 }, { "epoch": 0.46, "grad_norm": 1.6071804539063703, "learning_rate": 5.8366916782254345e-06, "loss": 0.7621, "step": 7235 }, { "epoch": 0.46, "grad_norm": 1.5353205147535662, "learning_rate": 5.83566972059439e-06, "loss": 0.7054, "step": 7236 }, { "epoch": 0.46, "grad_norm": 1.190844750316076, "learning_rate": 5.834647727048016e-06, "loss": 0.6585, "step": 7237 }, { "epoch": 0.46, "grad_norm": 1.5852453540509819, "learning_rate": 5.833625697630237e-06, "loss": 0.745, "step": 7238 }, { "epoch": 0.46, "grad_norm": 1.6319839300207097, "learning_rate": 5.832603632384978e-06, "loss": 0.6813, "step": 7239 }, { "epoch": 0.46, "grad_norm": 1.588618204234237, "learning_rate": 5.831581531356164e-06, "loss": 0.6724, "step": 7240 }, { "epoch": 0.46, "grad_norm": 1.4626965496186426, "learning_rate": 5.8305593945877236e-06, "loss": 0.6391, "step": 7241 }, { "epoch": 0.46, "grad_norm": 1.4955915213777722, "learning_rate": 5.829537222123585e-06, "loss": 0.6852, "step": 7242 }, { "epoch": 0.46, "grad_norm": 1.6430172631031785, "learning_rate": 5.828515014007678e-06, "loss": 0.6322, "step": 7243 }, { "epoch": 0.46, "grad_norm": 1.2748218788436898, "learning_rate": 5.827492770283939e-06, "loss": 0.6118, "step": 7244 }, { "epoch": 0.46, "grad_norm": 1.4132010349798416, "learning_rate": 5.826470490996299e-06, "loss": 0.7399, "step": 7245 }, { "epoch": 0.46, "grad_norm": 1.2859714033113432, "learning_rate": 5.825448176188693e-06, "loss": 0.6544, "step": 7246 }, { "epoch": 0.46, "grad_norm": 1.5510091472332292, "learning_rate": 5.82442582590506e-06, "loss": 0.6251, "step": 7247 }, { "epoch": 0.46, "grad_norm": 1.3980069446358465, "learning_rate": 5.823403440189337e-06, "loss": 0.6771, "step": 7248 }, { "epoch": 0.46, "grad_norm": 1.6079379488695784, "learning_rate": 5.822381019085466e-06, "loss": 0.6255, "step": 7249 }, { "epoch": 0.46, "grad_norm": 1.6440586765201095, "learning_rate": 5.821358562637384e-06, "loss": 0.6721, "step": 7250 }, { "epoch": 0.46, "grad_norm": 1.5966030646179272, "learning_rate": 5.820336070889038e-06, "loss": 0.7175, "step": 7251 }, { "epoch": 0.46, "grad_norm": 1.5277690398961137, "learning_rate": 5.819313543884372e-06, "loss": 0.8603, "step": 7252 }, { "epoch": 0.46, "grad_norm": 1.6035254388730897, "learning_rate": 5.8182909816673316e-06, "loss": 0.6509, "step": 7253 }, { "epoch": 0.46, "grad_norm": 1.650729676698241, "learning_rate": 5.817268384281864e-06, "loss": 0.6414, "step": 7254 }, { "epoch": 0.46, "grad_norm": 1.529427605947209, "learning_rate": 5.816245751771917e-06, "loss": 0.6494, "step": 7255 }, { "epoch": 0.46, "grad_norm": 1.0851211224010424, "learning_rate": 5.815223084181444e-06, "loss": 0.5661, "step": 7256 }, { "epoch": 0.46, "grad_norm": 1.6543865232620427, "learning_rate": 5.814200381554397e-06, "loss": 0.6012, "step": 7257 }, { "epoch": 0.46, "grad_norm": 1.6091230038309474, "learning_rate": 5.813177643934726e-06, "loss": 0.7504, "step": 7258 }, { "epoch": 0.46, "grad_norm": 1.5564558115102332, "learning_rate": 5.81215487136639e-06, "loss": 0.7792, "step": 7259 }, { "epoch": 0.46, "grad_norm": 1.6837483359714256, "learning_rate": 5.8111320638933446e-06, "loss": 0.8049, "step": 7260 }, { "epoch": 0.46, "grad_norm": 1.3440557537468658, "learning_rate": 5.810109221559548e-06, "loss": 0.6512, "step": 7261 }, { "epoch": 0.46, "grad_norm": 1.5546075625842541, "learning_rate": 5.809086344408958e-06, "loss": 0.6064, "step": 7262 }, { "epoch": 0.46, "grad_norm": 1.6238961201870534, "learning_rate": 5.808063432485538e-06, "loss": 0.7192, "step": 7263 }, { "epoch": 0.46, "grad_norm": 1.7125607445595186, "learning_rate": 5.807040485833248e-06, "loss": 0.6271, "step": 7264 }, { "epoch": 0.47, "grad_norm": 1.4135393395706835, "learning_rate": 5.806017504496055e-06, "loss": 0.6176, "step": 7265 }, { "epoch": 0.47, "grad_norm": 1.5384918959617921, "learning_rate": 5.804994488517922e-06, "loss": 0.767, "step": 7266 }, { "epoch": 0.47, "grad_norm": 1.6310500320543078, "learning_rate": 5.803971437942819e-06, "loss": 0.6844, "step": 7267 }, { "epoch": 0.47, "grad_norm": 1.5792414744527772, "learning_rate": 5.8029483528147136e-06, "loss": 0.6948, "step": 7268 }, { "epoch": 0.47, "grad_norm": 1.693224155871793, "learning_rate": 5.801925233177574e-06, "loss": 0.6578, "step": 7269 }, { "epoch": 0.47, "grad_norm": 1.249775566546125, "learning_rate": 5.8009020790753735e-06, "loss": 0.6279, "step": 7270 }, { "epoch": 0.47, "grad_norm": 1.8095405845950032, "learning_rate": 5.799878890552083e-06, "loss": 0.7378, "step": 7271 }, { "epoch": 0.47, "grad_norm": 1.4047611854647346, "learning_rate": 5.798855667651681e-06, "loss": 0.752, "step": 7272 }, { "epoch": 0.47, "grad_norm": 1.490795322967138, "learning_rate": 5.79783241041814e-06, "loss": 0.6792, "step": 7273 }, { "epoch": 0.47, "grad_norm": 1.812232440824185, "learning_rate": 5.796809118895437e-06, "loss": 0.6903, "step": 7274 }, { "epoch": 0.47, "grad_norm": 1.4966062755096459, "learning_rate": 5.795785793127554e-06, "loss": 0.7602, "step": 7275 }, { "epoch": 0.47, "grad_norm": 2.0730590927141685, "learning_rate": 5.794762433158469e-06, "loss": 0.6481, "step": 7276 }, { "epoch": 0.47, "grad_norm": 1.640978575182762, "learning_rate": 5.793739039032166e-06, "loss": 0.6596, "step": 7277 }, { "epoch": 0.47, "grad_norm": 1.674225980494881, "learning_rate": 5.7927156107926264e-06, "loss": 0.7022, "step": 7278 }, { "epoch": 0.47, "grad_norm": 4.562368360006823, "learning_rate": 5.791692148483834e-06, "loss": 0.6566, "step": 7279 }, { "epoch": 0.47, "grad_norm": 1.4049919367966983, "learning_rate": 5.790668652149778e-06, "loss": 0.7141, "step": 7280 }, { "epoch": 0.47, "grad_norm": 1.5583693807897165, "learning_rate": 5.789645121834445e-06, "loss": 0.7551, "step": 7281 }, { "epoch": 0.47, "grad_norm": 1.4966342731170867, "learning_rate": 5.788621557581824e-06, "loss": 0.7213, "step": 7282 }, { "epoch": 0.47, "grad_norm": 1.5043365073046917, "learning_rate": 5.7875979594359045e-06, "loss": 0.6469, "step": 7283 }, { "epoch": 0.47, "grad_norm": 1.5068129546266285, "learning_rate": 5.78657432744068e-06, "loss": 0.7302, "step": 7284 }, { "epoch": 0.47, "grad_norm": 1.4654236905504747, "learning_rate": 5.785550661640145e-06, "loss": 0.7359, "step": 7285 }, { "epoch": 0.47, "grad_norm": 1.4428632283415108, "learning_rate": 5.784526962078292e-06, "loss": 0.7326, "step": 7286 }, { "epoch": 0.47, "grad_norm": 1.7722971475527411, "learning_rate": 5.783503228799119e-06, "loss": 0.6726, "step": 7287 }, { "epoch": 0.47, "grad_norm": 1.4360073691578388, "learning_rate": 5.782479461846624e-06, "loss": 0.8002, "step": 7288 }, { "epoch": 0.47, "grad_norm": 1.666746155698749, "learning_rate": 5.781455661264805e-06, "loss": 0.602, "step": 7289 }, { "epoch": 0.47, "grad_norm": 1.4134524911932644, "learning_rate": 5.7804318270976655e-06, "loss": 0.6848, "step": 7290 }, { "epoch": 0.47, "grad_norm": 1.8069059055308436, "learning_rate": 5.779407959389205e-06, "loss": 0.7361, "step": 7291 }, { "epoch": 0.47, "grad_norm": 1.4244283464538567, "learning_rate": 5.778384058183426e-06, "loss": 0.7154, "step": 7292 }, { "epoch": 0.47, "grad_norm": 1.7282586005032183, "learning_rate": 5.777360123524338e-06, "loss": 0.712, "step": 7293 }, { "epoch": 0.47, "grad_norm": 1.5120264618962438, "learning_rate": 5.776336155455945e-06, "loss": 0.6639, "step": 7294 }, { "epoch": 0.47, "grad_norm": 1.4580327758054208, "learning_rate": 5.775312154022256e-06, "loss": 0.6514, "step": 7295 }, { "epoch": 0.47, "grad_norm": 1.7427266882747912, "learning_rate": 5.774288119267279e-06, "loss": 0.7419, "step": 7296 }, { "epoch": 0.47, "grad_norm": 1.4664633679113936, "learning_rate": 5.773264051235026e-06, "loss": 0.6448, "step": 7297 }, { "epoch": 0.47, "grad_norm": 1.6332030720562374, "learning_rate": 5.772239949969509e-06, "loss": 0.7575, "step": 7298 }, { "epoch": 0.47, "grad_norm": 1.0234952130136041, "learning_rate": 5.771215815514741e-06, "loss": 0.6759, "step": 7299 }, { "epoch": 0.47, "grad_norm": 1.414701626012287, "learning_rate": 5.770191647914738e-06, "loss": 0.6344, "step": 7300 }, { "epoch": 0.47, "grad_norm": 2.015136355086989, "learning_rate": 5.7691674472135175e-06, "loss": 0.6807, "step": 7301 }, { "epoch": 0.47, "grad_norm": 1.6132847461983555, "learning_rate": 5.768143213455094e-06, "loss": 0.6754, "step": 7302 }, { "epoch": 0.47, "grad_norm": 2.493934703774163, "learning_rate": 5.767118946683491e-06, "loss": 0.7775, "step": 7303 }, { "epoch": 0.47, "grad_norm": 1.6538183323163702, "learning_rate": 5.766094646942728e-06, "loss": 0.7007, "step": 7304 }, { "epoch": 0.47, "grad_norm": 1.53347011148948, "learning_rate": 5.765070314276826e-06, "loss": 0.6651, "step": 7305 }, { "epoch": 0.47, "grad_norm": 1.7165993548296008, "learning_rate": 5.764045948729809e-06, "loss": 0.6324, "step": 7306 }, { "epoch": 0.47, "grad_norm": 1.3930952507648158, "learning_rate": 5.763021550345703e-06, "loss": 0.7276, "step": 7307 }, { "epoch": 0.47, "grad_norm": 1.559177767344299, "learning_rate": 5.761997119168535e-06, "loss": 0.6596, "step": 7308 }, { "epoch": 0.47, "grad_norm": 1.5655789472072075, "learning_rate": 5.760972655242332e-06, "loss": 0.7175, "step": 7309 }, { "epoch": 0.47, "grad_norm": 1.6535084250022298, "learning_rate": 5.7599481586111225e-06, "loss": 0.8497, "step": 7310 }, { "epoch": 0.47, "grad_norm": 1.4560117033663862, "learning_rate": 5.758923629318938e-06, "loss": 0.7066, "step": 7311 }, { "epoch": 0.47, "grad_norm": 1.7323426459976727, "learning_rate": 5.757899067409811e-06, "loss": 0.7609, "step": 7312 }, { "epoch": 0.47, "grad_norm": 1.474364778813526, "learning_rate": 5.756874472927775e-06, "loss": 0.5682, "step": 7313 }, { "epoch": 0.47, "grad_norm": 1.3577500530868392, "learning_rate": 5.7558498459168635e-06, "loss": 0.7176, "step": 7314 }, { "epoch": 0.47, "grad_norm": 1.6823274896550013, "learning_rate": 5.7548251864211135e-06, "loss": 0.7448, "step": 7315 }, { "epoch": 0.47, "grad_norm": 1.662839101811978, "learning_rate": 5.753800494484565e-06, "loss": 0.73, "step": 7316 }, { "epoch": 0.47, "grad_norm": 1.5865324152608036, "learning_rate": 5.752775770151254e-06, "loss": 0.7244, "step": 7317 }, { "epoch": 0.47, "grad_norm": 1.1325367509169613, "learning_rate": 5.751751013465223e-06, "loss": 0.565, "step": 7318 }, { "epoch": 0.47, "grad_norm": 1.7985970642043423, "learning_rate": 5.750726224470513e-06, "loss": 0.7294, "step": 7319 }, { "epoch": 0.47, "grad_norm": 1.5053212066078265, "learning_rate": 5.749701403211166e-06, "loss": 0.7453, "step": 7320 }, { "epoch": 0.47, "grad_norm": 1.031797254288639, "learning_rate": 5.748676549731229e-06, "loss": 0.6357, "step": 7321 }, { "epoch": 0.47, "grad_norm": 1.5631684697255415, "learning_rate": 5.747651664074746e-06, "loss": 0.6836, "step": 7322 }, { "epoch": 0.47, "grad_norm": 1.597067233015891, "learning_rate": 5.746626746285767e-06, "loss": 0.6846, "step": 7323 }, { "epoch": 0.47, "grad_norm": 1.4320443758926682, "learning_rate": 5.745601796408339e-06, "loss": 0.7317, "step": 7324 }, { "epoch": 0.47, "grad_norm": 1.6970454034842513, "learning_rate": 5.744576814486512e-06, "loss": 0.6418, "step": 7325 }, { "epoch": 0.47, "grad_norm": 1.2052099889064989, "learning_rate": 5.74355180056434e-06, "loss": 0.7374, "step": 7326 }, { "epoch": 0.47, "grad_norm": 1.680778916563757, "learning_rate": 5.742526754685872e-06, "loss": 0.6532, "step": 7327 }, { "epoch": 0.47, "grad_norm": 1.5981563773573393, "learning_rate": 5.741501676895164e-06, "loss": 0.6481, "step": 7328 }, { "epoch": 0.47, "grad_norm": 1.4230404543296846, "learning_rate": 5.740476567236273e-06, "loss": 0.6638, "step": 7329 }, { "epoch": 0.47, "grad_norm": 1.1913971620647321, "learning_rate": 5.739451425753254e-06, "loss": 0.7263, "step": 7330 }, { "epoch": 0.47, "grad_norm": 1.5959002032453151, "learning_rate": 5.738426252490168e-06, "loss": 0.7847, "step": 7331 }, { "epoch": 0.47, "grad_norm": 1.33238155201015, "learning_rate": 5.737401047491074e-06, "loss": 0.6628, "step": 7332 }, { "epoch": 0.47, "grad_norm": 2.036822314543693, "learning_rate": 5.73637581080003e-06, "loss": 0.6522, "step": 7333 }, { "epoch": 0.47, "grad_norm": 1.5582041723135533, "learning_rate": 5.735350542461103e-06, "loss": 0.7088, "step": 7334 }, { "epoch": 0.47, "grad_norm": 1.10987262662336, "learning_rate": 5.734325242518355e-06, "loss": 0.7061, "step": 7335 }, { "epoch": 0.47, "grad_norm": 1.582348658430423, "learning_rate": 5.733299911015848e-06, "loss": 0.8171, "step": 7336 }, { "epoch": 0.47, "grad_norm": 1.4543250535962486, "learning_rate": 5.732274547997655e-06, "loss": 0.6076, "step": 7337 }, { "epoch": 0.47, "grad_norm": 1.5587570397994228, "learning_rate": 5.73124915350784e-06, "loss": 0.6161, "step": 7338 }, { "epoch": 0.47, "grad_norm": 1.7616628242611272, "learning_rate": 5.730223727590472e-06, "loss": 0.5793, "step": 7339 }, { "epoch": 0.47, "grad_norm": 1.441803138591793, "learning_rate": 5.7291982702896234e-06, "loss": 0.6742, "step": 7340 }, { "epoch": 0.47, "grad_norm": 0.9790456727468763, "learning_rate": 5.728172781649365e-06, "loss": 0.682, "step": 7341 }, { "epoch": 0.47, "grad_norm": 1.4061861256566104, "learning_rate": 5.72714726171377e-06, "loss": 0.705, "step": 7342 }, { "epoch": 0.47, "grad_norm": 1.4995720407828401, "learning_rate": 5.726121710526914e-06, "loss": 0.5665, "step": 7343 }, { "epoch": 0.47, "grad_norm": 1.5328741782982058, "learning_rate": 5.725096128132873e-06, "loss": 0.7521, "step": 7344 }, { "epoch": 0.47, "grad_norm": 2.0910324055829217, "learning_rate": 5.7240705145757244e-06, "loss": 0.7246, "step": 7345 }, { "epoch": 0.47, "grad_norm": 1.5012706631549302, "learning_rate": 5.7230448698995466e-06, "loss": 0.7204, "step": 7346 }, { "epoch": 0.47, "grad_norm": 1.4974293720686331, "learning_rate": 5.722019194148419e-06, "loss": 0.5987, "step": 7347 }, { "epoch": 0.47, "grad_norm": 1.6341681283688387, "learning_rate": 5.7209934873664245e-06, "loss": 0.7318, "step": 7348 }, { "epoch": 0.47, "grad_norm": 2.082663838733094, "learning_rate": 5.719967749597644e-06, "loss": 0.7027, "step": 7349 }, { "epoch": 0.47, "grad_norm": 1.6473087859240818, "learning_rate": 5.718941980886162e-06, "loss": 0.7009, "step": 7350 }, { "epoch": 0.47, "grad_norm": 1.512625756868999, "learning_rate": 5.717916181276065e-06, "loss": 0.6345, "step": 7351 }, { "epoch": 0.47, "grad_norm": 1.385042096949713, "learning_rate": 5.716890350811439e-06, "loss": 0.7087, "step": 7352 }, { "epoch": 0.47, "grad_norm": 1.6344718587862797, "learning_rate": 5.715864489536373e-06, "loss": 0.7147, "step": 7353 }, { "epoch": 0.47, "grad_norm": 1.4350004606956241, "learning_rate": 5.714838597494955e-06, "loss": 0.6729, "step": 7354 }, { "epoch": 0.47, "grad_norm": 1.3691932901183177, "learning_rate": 5.713812674731276e-06, "loss": 0.6417, "step": 7355 }, { "epoch": 0.47, "grad_norm": 1.5348100790707369, "learning_rate": 5.712786721289428e-06, "loss": 0.665, "step": 7356 }, { "epoch": 0.47, "grad_norm": 1.5943773205681975, "learning_rate": 5.711760737213504e-06, "loss": 0.7827, "step": 7357 }, { "epoch": 0.47, "grad_norm": 1.3356193191091714, "learning_rate": 5.7107347225476e-06, "loss": 0.6428, "step": 7358 }, { "epoch": 0.47, "grad_norm": 1.4298821838860205, "learning_rate": 5.70970867733581e-06, "loss": 0.6734, "step": 7359 }, { "epoch": 0.47, "grad_norm": 1.640604141101726, "learning_rate": 5.708682601622233e-06, "loss": 0.6619, "step": 7360 }, { "epoch": 0.47, "grad_norm": 1.481180759650316, "learning_rate": 5.707656495450969e-06, "loss": 0.7282, "step": 7361 }, { "epoch": 0.47, "grad_norm": 1.6933005947214055, "learning_rate": 5.706630358866113e-06, "loss": 0.6895, "step": 7362 }, { "epoch": 0.47, "grad_norm": 1.2726789938718337, "learning_rate": 5.70560419191177e-06, "loss": 0.647, "step": 7363 }, { "epoch": 0.47, "grad_norm": 1.7082187610250044, "learning_rate": 5.7045779946320416e-06, "loss": 0.8158, "step": 7364 }, { "epoch": 0.47, "grad_norm": 1.3635708777936997, "learning_rate": 5.7035517670710304e-06, "loss": 0.6729, "step": 7365 }, { "epoch": 0.47, "grad_norm": 1.3056836253116775, "learning_rate": 5.702525509272845e-06, "loss": 0.67, "step": 7366 }, { "epoch": 0.47, "grad_norm": 1.3557889626839223, "learning_rate": 5.701499221281587e-06, "loss": 0.6091, "step": 7367 }, { "epoch": 0.47, "grad_norm": 1.7205872524664767, "learning_rate": 5.700472903141368e-06, "loss": 0.6565, "step": 7368 }, { "epoch": 0.47, "grad_norm": 1.6500360423405107, "learning_rate": 5.699446554896294e-06, "loss": 0.6782, "step": 7369 }, { "epoch": 0.47, "grad_norm": 1.5986099910903253, "learning_rate": 5.698420176590479e-06, "loss": 0.7027, "step": 7370 }, { "epoch": 0.47, "grad_norm": 1.591763512840065, "learning_rate": 5.697393768268031e-06, "loss": 0.6629, "step": 7371 }, { "epoch": 0.47, "grad_norm": 1.4726608149504876, "learning_rate": 5.696367329973064e-06, "loss": 0.6786, "step": 7372 }, { "epoch": 0.47, "grad_norm": 1.4065038009462372, "learning_rate": 5.695340861749693e-06, "loss": 0.6281, "step": 7373 }, { "epoch": 0.47, "grad_norm": 2.2244735432400597, "learning_rate": 5.694314363642033e-06, "loss": 0.6446, "step": 7374 }, { "epoch": 0.47, "grad_norm": 1.1412227622607802, "learning_rate": 5.6932878356942005e-06, "loss": 0.7267, "step": 7375 }, { "epoch": 0.47, "grad_norm": 1.107457457804259, "learning_rate": 5.692261277950313e-06, "loss": 0.6952, "step": 7376 }, { "epoch": 0.47, "grad_norm": 0.9700593030814113, "learning_rate": 5.691234690454491e-06, "loss": 0.7267, "step": 7377 }, { "epoch": 0.47, "grad_norm": 1.3490413396404954, "learning_rate": 5.690208073250855e-06, "loss": 0.6811, "step": 7378 }, { "epoch": 0.47, "grad_norm": 1.606410112670841, "learning_rate": 5.6891814263835245e-06, "loss": 0.6987, "step": 7379 }, { "epoch": 0.47, "grad_norm": 1.7418884639379824, "learning_rate": 5.688154749896626e-06, "loss": 0.7377, "step": 7380 }, { "epoch": 0.47, "grad_norm": 1.1591884625441014, "learning_rate": 5.687128043834282e-06, "loss": 0.6591, "step": 7381 }, { "epoch": 0.47, "grad_norm": 1.5691226393683502, "learning_rate": 5.686101308240619e-06, "loss": 0.7442, "step": 7382 }, { "epoch": 0.47, "grad_norm": 1.5394425173057869, "learning_rate": 5.685074543159763e-06, "loss": 0.6865, "step": 7383 }, { "epoch": 0.47, "grad_norm": 1.5652751092480743, "learning_rate": 5.684047748635844e-06, "loss": 0.707, "step": 7384 }, { "epoch": 0.47, "grad_norm": 1.5085868774371791, "learning_rate": 5.683020924712987e-06, "loss": 0.668, "step": 7385 }, { "epoch": 0.47, "grad_norm": 1.6639560535056488, "learning_rate": 5.681994071435327e-06, "loss": 0.6917, "step": 7386 }, { "epoch": 0.47, "grad_norm": 1.6100554613871108, "learning_rate": 5.680967188846996e-06, "loss": 0.7142, "step": 7387 }, { "epoch": 0.47, "grad_norm": 1.5097126416701727, "learning_rate": 5.679940276992127e-06, "loss": 0.6093, "step": 7388 }, { "epoch": 0.47, "grad_norm": 1.491639037443991, "learning_rate": 5.678913335914852e-06, "loss": 0.682, "step": 7389 }, { "epoch": 0.47, "grad_norm": 1.737174917223038, "learning_rate": 5.67788636565931e-06, "loss": 0.6417, "step": 7390 }, { "epoch": 0.47, "grad_norm": 1.8612570187581066, "learning_rate": 5.676859366269635e-06, "loss": 0.6404, "step": 7391 }, { "epoch": 0.47, "grad_norm": 1.5929743339879772, "learning_rate": 5.675832337789967e-06, "loss": 0.6268, "step": 7392 }, { "epoch": 0.47, "grad_norm": 1.5178727836417067, "learning_rate": 5.6748052802644445e-06, "loss": 0.6829, "step": 7393 }, { "epoch": 0.47, "grad_norm": 1.498993493661501, "learning_rate": 5.67377819373721e-06, "loss": 0.674, "step": 7394 }, { "epoch": 0.47, "grad_norm": 1.4554076952151247, "learning_rate": 5.672751078252403e-06, "loss": 0.7153, "step": 7395 }, { "epoch": 0.47, "grad_norm": 1.6911927186819082, "learning_rate": 5.671723933854171e-06, "loss": 0.6586, "step": 7396 }, { "epoch": 0.47, "grad_norm": 1.4775369624702048, "learning_rate": 5.670696760586654e-06, "loss": 0.6939, "step": 7397 }, { "epoch": 0.47, "grad_norm": 1.64873003995294, "learning_rate": 5.669669558494e-06, "loss": 0.6506, "step": 7398 }, { "epoch": 0.47, "grad_norm": 1.6418325317874576, "learning_rate": 5.668642327620356e-06, "loss": 0.7462, "step": 7399 }, { "epoch": 0.47, "grad_norm": 1.473269566266316, "learning_rate": 5.6676150680098685e-06, "loss": 0.6532, "step": 7400 }, { "epoch": 0.47, "grad_norm": 1.604972194168905, "learning_rate": 5.66658777970669e-06, "loss": 0.6686, "step": 7401 }, { "epoch": 0.47, "grad_norm": 1.6565799831538242, "learning_rate": 5.665560462754968e-06, "loss": 0.7509, "step": 7402 }, { "epoch": 0.47, "grad_norm": 1.4845683753790233, "learning_rate": 5.664533117198856e-06, "loss": 0.6699, "step": 7403 }, { "epoch": 0.47, "grad_norm": 1.736525968222349, "learning_rate": 5.663505743082507e-06, "loss": 0.7848, "step": 7404 }, { "epoch": 0.47, "grad_norm": 0.9638390069562659, "learning_rate": 5.662478340450076e-06, "loss": 0.5861, "step": 7405 }, { "epoch": 0.47, "grad_norm": 1.5109366925006582, "learning_rate": 5.661450909345718e-06, "loss": 0.6912, "step": 7406 }, { "epoch": 0.47, "grad_norm": 1.6424130767333267, "learning_rate": 5.660423449813588e-06, "loss": 0.7791, "step": 7407 }, { "epoch": 0.47, "grad_norm": 1.5152337347098648, "learning_rate": 5.659395961897848e-06, "loss": 0.6656, "step": 7408 }, { "epoch": 0.47, "grad_norm": 1.542019544413736, "learning_rate": 5.658368445642655e-06, "loss": 0.8279, "step": 7409 }, { "epoch": 0.47, "grad_norm": 1.6736336448122942, "learning_rate": 5.657340901092169e-06, "loss": 0.7368, "step": 7410 }, { "epoch": 0.47, "grad_norm": 1.6116391785926831, "learning_rate": 5.656313328290552e-06, "loss": 0.703, "step": 7411 }, { "epoch": 0.47, "grad_norm": 1.5008771618699437, "learning_rate": 5.655285727281968e-06, "loss": 0.6529, "step": 7412 }, { "epoch": 0.47, "grad_norm": 1.3760271680306082, "learning_rate": 5.65425809811058e-06, "loss": 0.6571, "step": 7413 }, { "epoch": 0.47, "grad_norm": 1.7479421197812206, "learning_rate": 5.653230440820554e-06, "loss": 0.7919, "step": 7414 }, { "epoch": 0.47, "grad_norm": 1.2050130569922484, "learning_rate": 5.652202755456055e-06, "loss": 0.6807, "step": 7415 }, { "epoch": 0.47, "grad_norm": 1.386658857809081, "learning_rate": 5.651175042061254e-06, "loss": 0.7028, "step": 7416 }, { "epoch": 0.47, "grad_norm": 2.2113064955696924, "learning_rate": 5.650147300680318e-06, "loss": 0.6992, "step": 7417 }, { "epoch": 0.47, "grad_norm": 1.0570613884416746, "learning_rate": 5.649119531357415e-06, "loss": 0.7608, "step": 7418 }, { "epoch": 0.47, "grad_norm": 1.6694520064472793, "learning_rate": 5.6480917341367205e-06, "loss": 0.6919, "step": 7419 }, { "epoch": 0.47, "grad_norm": 0.9926641791566139, "learning_rate": 5.647063909062404e-06, "loss": 0.7003, "step": 7420 }, { "epoch": 0.47, "grad_norm": 1.6771513139140868, "learning_rate": 5.646036056178641e-06, "loss": 0.6649, "step": 7421 }, { "epoch": 0.48, "grad_norm": 1.8298363551765198, "learning_rate": 5.645008175529605e-06, "loss": 0.7226, "step": 7422 }, { "epoch": 0.48, "grad_norm": 1.6857675558006824, "learning_rate": 5.643980267159474e-06, "loss": 0.728, "step": 7423 }, { "epoch": 0.48, "grad_norm": 1.5082653091485314, "learning_rate": 5.642952331112423e-06, "loss": 0.7943, "step": 7424 }, { "epoch": 0.48, "grad_norm": 1.5033772271979047, "learning_rate": 5.641924367432634e-06, "loss": 0.8082, "step": 7425 }, { "epoch": 0.48, "grad_norm": 1.4947835637386233, "learning_rate": 5.640896376164283e-06, "loss": 0.6384, "step": 7426 }, { "epoch": 0.48, "grad_norm": 1.6114300495367113, "learning_rate": 5.639868357351554e-06, "loss": 0.6846, "step": 7427 }, { "epoch": 0.48, "grad_norm": 1.5320970076499054, "learning_rate": 5.638840311038625e-06, "loss": 0.7151, "step": 7428 }, { "epoch": 0.48, "grad_norm": 1.4569266452710534, "learning_rate": 5.637812237269683e-06, "loss": 0.6686, "step": 7429 }, { "epoch": 0.48, "grad_norm": 1.4662225029939904, "learning_rate": 5.6367841360889105e-06, "loss": 0.5816, "step": 7430 }, { "epoch": 0.48, "grad_norm": 1.850976520183996, "learning_rate": 5.635756007540495e-06, "loss": 0.656, "step": 7431 }, { "epoch": 0.48, "grad_norm": 1.1690958931310382, "learning_rate": 5.634727851668622e-06, "loss": 0.6551, "step": 7432 }, { "epoch": 0.48, "grad_norm": 1.4855108593292274, "learning_rate": 5.6336996685174795e-06, "loss": 0.6866, "step": 7433 }, { "epoch": 0.48, "grad_norm": 1.3937596076092058, "learning_rate": 5.632671458131258e-06, "loss": 0.7257, "step": 7434 }, { "epoch": 0.48, "grad_norm": 1.8111845014518335, "learning_rate": 5.631643220554146e-06, "loss": 0.7244, "step": 7435 }, { "epoch": 0.48, "grad_norm": 1.0412001298543296, "learning_rate": 5.630614955830334e-06, "loss": 0.6378, "step": 7436 }, { "epoch": 0.48, "grad_norm": 1.6961214486223994, "learning_rate": 5.629586664004018e-06, "loss": 0.7051, "step": 7437 }, { "epoch": 0.48, "grad_norm": 1.4706996499507086, "learning_rate": 5.628558345119389e-06, "loss": 0.634, "step": 7438 }, { "epoch": 0.48, "grad_norm": 1.511393827147841, "learning_rate": 5.6275299992206444e-06, "loss": 0.6802, "step": 7439 }, { "epoch": 0.48, "grad_norm": 1.5442998960933103, "learning_rate": 5.626501626351979e-06, "loss": 0.7187, "step": 7440 }, { "epoch": 0.48, "grad_norm": 1.6134188535526908, "learning_rate": 5.625473226557588e-06, "loss": 0.6826, "step": 7441 }, { "epoch": 0.48, "grad_norm": 1.2592775365373654, "learning_rate": 5.624444799881674e-06, "loss": 0.6553, "step": 7442 }, { "epoch": 0.48, "grad_norm": 1.393380109333799, "learning_rate": 5.623416346368434e-06, "loss": 0.7224, "step": 7443 }, { "epoch": 0.48, "grad_norm": 1.5227264752557257, "learning_rate": 5.62238786606207e-06, "loss": 0.6946, "step": 7444 }, { "epoch": 0.48, "grad_norm": 1.3863632784800735, "learning_rate": 5.621359359006784e-06, "loss": 0.7121, "step": 7445 }, { "epoch": 0.48, "grad_norm": 1.464698737814942, "learning_rate": 5.620330825246777e-06, "loss": 0.7393, "step": 7446 }, { "epoch": 0.48, "grad_norm": 1.0181390472979157, "learning_rate": 5.6193022648262555e-06, "loss": 0.6874, "step": 7447 }, { "epoch": 0.48, "grad_norm": 1.7214177511389142, "learning_rate": 5.618273677789424e-06, "loss": 0.6253, "step": 7448 }, { "epoch": 0.48, "grad_norm": 1.0357556011505682, "learning_rate": 5.617245064180488e-06, "loss": 0.6589, "step": 7449 }, { "epoch": 0.48, "grad_norm": 1.2538559927732165, "learning_rate": 5.616216424043657e-06, "loss": 0.6067, "step": 7450 }, { "epoch": 0.48, "grad_norm": 1.5895823760223173, "learning_rate": 5.615187757423137e-06, "loss": 0.6339, "step": 7451 }, { "epoch": 0.48, "grad_norm": 2.0887813668624085, "learning_rate": 5.614159064363142e-06, "loss": 0.6416, "step": 7452 }, { "epoch": 0.48, "grad_norm": 1.6387278635352334, "learning_rate": 5.613130344907881e-06, "loss": 0.7031, "step": 7453 }, { "epoch": 0.48, "grad_norm": 1.4632159865400316, "learning_rate": 5.612101599101566e-06, "loss": 0.7184, "step": 7454 }, { "epoch": 0.48, "grad_norm": 1.5793147681265518, "learning_rate": 5.611072826988409e-06, "loss": 0.6884, "step": 7455 }, { "epoch": 0.48, "grad_norm": 1.6220466874063757, "learning_rate": 5.610044028612628e-06, "loss": 0.7896, "step": 7456 }, { "epoch": 0.48, "grad_norm": 1.4832716909178536, "learning_rate": 5.609015204018435e-06, "loss": 0.6753, "step": 7457 }, { "epoch": 0.48, "grad_norm": 1.2350717235808917, "learning_rate": 5.607986353250049e-06, "loss": 0.6241, "step": 7458 }, { "epoch": 0.48, "grad_norm": 1.126237238372368, "learning_rate": 5.6069574763516866e-06, "loss": 0.6779, "step": 7459 }, { "epoch": 0.48, "grad_norm": 1.5191255041487381, "learning_rate": 5.605928573367568e-06, "loss": 0.6902, "step": 7460 }, { "epoch": 0.48, "grad_norm": 1.5645472486370136, "learning_rate": 5.604899644341911e-06, "loss": 0.6842, "step": 7461 }, { "epoch": 0.48, "grad_norm": 1.1314665945704658, "learning_rate": 5.60387068931894e-06, "loss": 0.6927, "step": 7462 }, { "epoch": 0.48, "grad_norm": 1.6958496479814795, "learning_rate": 5.602841708342876e-06, "loss": 0.7016, "step": 7463 }, { "epoch": 0.48, "grad_norm": 1.5378436990191198, "learning_rate": 5.6018127014579404e-06, "loss": 0.5915, "step": 7464 }, { "epoch": 0.48, "grad_norm": 1.4780388497951011, "learning_rate": 5.60078366870836e-06, "loss": 0.6262, "step": 7465 }, { "epoch": 0.48, "grad_norm": 1.8602191580619203, "learning_rate": 5.599754610138361e-06, "loss": 0.7537, "step": 7466 }, { "epoch": 0.48, "grad_norm": 1.1355384576499303, "learning_rate": 5.598725525792168e-06, "loss": 0.5676, "step": 7467 }, { "epoch": 0.48, "grad_norm": 1.565043366554734, "learning_rate": 5.59769641571401e-06, "loss": 0.6956, "step": 7468 }, { "epoch": 0.48, "grad_norm": 1.6708616151964732, "learning_rate": 5.596667279948117e-06, "loss": 0.7634, "step": 7469 }, { "epoch": 0.48, "grad_norm": 1.8121343630335083, "learning_rate": 5.595638118538718e-06, "loss": 0.7243, "step": 7470 }, { "epoch": 0.48, "grad_norm": 1.5948318306036917, "learning_rate": 5.594608931530044e-06, "loss": 0.7734, "step": 7471 }, { "epoch": 0.48, "grad_norm": 1.4680199241370482, "learning_rate": 5.593579718966325e-06, "loss": 0.5447, "step": 7472 }, { "epoch": 0.48, "grad_norm": 1.5824610894976059, "learning_rate": 5.592550480891801e-06, "loss": 0.6456, "step": 7473 }, { "epoch": 0.48, "grad_norm": 1.3499582052457768, "learning_rate": 5.5915212173507e-06, "loss": 0.6394, "step": 7474 }, { "epoch": 0.48, "grad_norm": 1.3808122276296928, "learning_rate": 5.590491928387261e-06, "loss": 0.6796, "step": 7475 }, { "epoch": 0.48, "grad_norm": 1.692122739592856, "learning_rate": 5.58946261404572e-06, "loss": 0.7121, "step": 7476 }, { "epoch": 0.48, "grad_norm": 1.5211771559440759, "learning_rate": 5.588433274370314e-06, "loss": 0.6803, "step": 7477 }, { "epoch": 0.48, "grad_norm": 1.3119451158562478, "learning_rate": 5.587403909405281e-06, "loss": 0.7289, "step": 7478 }, { "epoch": 0.48, "grad_norm": 1.5633694205668414, "learning_rate": 5.586374519194863e-06, "loss": 0.6855, "step": 7479 }, { "epoch": 0.48, "grad_norm": 1.5878598206762808, "learning_rate": 5.585345103783302e-06, "loss": 0.663, "step": 7480 }, { "epoch": 0.48, "grad_norm": 1.1795415881278983, "learning_rate": 5.584315663214836e-06, "loss": 0.7282, "step": 7481 }, { "epoch": 0.48, "grad_norm": 1.4716482478292667, "learning_rate": 5.583286197533713e-06, "loss": 0.5857, "step": 7482 }, { "epoch": 0.48, "grad_norm": 1.5200276787331315, "learning_rate": 5.582256706784174e-06, "loss": 0.6649, "step": 7483 }, { "epoch": 0.48, "grad_norm": 1.4087841676377142, "learning_rate": 5.581227191010465e-06, "loss": 0.7286, "step": 7484 }, { "epoch": 0.48, "grad_norm": 1.6081120697150215, "learning_rate": 5.580197650256832e-06, "loss": 0.7519, "step": 7485 }, { "epoch": 0.48, "grad_norm": 1.438764760124581, "learning_rate": 5.5791680845675245e-06, "loss": 0.6949, "step": 7486 }, { "epoch": 0.48, "grad_norm": 1.6841856705770109, "learning_rate": 5.578138493986788e-06, "loss": 0.6539, "step": 7487 }, { "epoch": 0.48, "grad_norm": 1.595186243707831, "learning_rate": 5.577108878558875e-06, "loss": 0.6582, "step": 7488 }, { "epoch": 0.48, "grad_norm": 1.5119284899534517, "learning_rate": 5.576079238328035e-06, "loss": 0.6969, "step": 7489 }, { "epoch": 0.48, "grad_norm": 1.5282779821481178, "learning_rate": 5.57504957333852e-06, "loss": 0.697, "step": 7490 }, { "epoch": 0.48, "grad_norm": 1.7424367258153408, "learning_rate": 5.574019883634582e-06, "loss": 0.7041, "step": 7491 }, { "epoch": 0.48, "grad_norm": 1.5916085828052284, "learning_rate": 5.572990169260477e-06, "loss": 0.7088, "step": 7492 }, { "epoch": 0.48, "grad_norm": 1.6821478990855603, "learning_rate": 5.571960430260457e-06, "loss": 0.7765, "step": 7493 }, { "epoch": 0.48, "grad_norm": 1.5807325030988968, "learning_rate": 5.570930666678781e-06, "loss": 0.702, "step": 7494 }, { "epoch": 0.48, "grad_norm": 1.0081326293541153, "learning_rate": 5.569900878559704e-06, "loss": 0.5778, "step": 7495 }, { "epoch": 0.48, "grad_norm": 1.4516776425872058, "learning_rate": 5.568871065947483e-06, "loss": 0.7046, "step": 7496 }, { "epoch": 0.48, "grad_norm": 1.477885035421031, "learning_rate": 5.567841228886381e-06, "loss": 0.6833, "step": 7497 }, { "epoch": 0.48, "grad_norm": 1.5559108755593891, "learning_rate": 5.566811367420656e-06, "loss": 0.7246, "step": 7498 }, { "epoch": 0.48, "grad_norm": 1.6297447005637542, "learning_rate": 5.56578148159457e-06, "loss": 0.7333, "step": 7499 }, { "epoch": 0.48, "grad_norm": 1.5257717768753893, "learning_rate": 5.564751571452383e-06, "loss": 0.7228, "step": 7500 }, { "epoch": 0.48, "grad_norm": 1.3697901557582102, "learning_rate": 5.5637216370383615e-06, "loss": 0.5734, "step": 7501 }, { "epoch": 0.48, "grad_norm": 1.3320224906499267, "learning_rate": 5.562691678396768e-06, "loss": 0.7142, "step": 7502 }, { "epoch": 0.48, "grad_norm": 1.4761487624424723, "learning_rate": 5.561661695571869e-06, "loss": 0.6141, "step": 7503 }, { "epoch": 0.48, "grad_norm": 1.5772711355106566, "learning_rate": 5.56063168860793e-06, "loss": 0.6937, "step": 7504 }, { "epoch": 0.48, "grad_norm": 1.6691950072301196, "learning_rate": 5.559601657549219e-06, "loss": 0.693, "step": 7505 }, { "epoch": 0.48, "grad_norm": 1.5213684347365233, "learning_rate": 5.558571602440006e-06, "loss": 0.7425, "step": 7506 }, { "epoch": 0.48, "grad_norm": 1.4523174018703051, "learning_rate": 5.557541523324558e-06, "loss": 0.7398, "step": 7507 }, { "epoch": 0.48, "grad_norm": 1.3697341429369623, "learning_rate": 5.556511420247146e-06, "loss": 0.7178, "step": 7508 }, { "epoch": 0.48, "grad_norm": 1.7710787478808032, "learning_rate": 5.555481293252044e-06, "loss": 0.7339, "step": 7509 }, { "epoch": 0.48, "grad_norm": 1.4901415274238374, "learning_rate": 5.554451142383524e-06, "loss": 0.6405, "step": 7510 }, { "epoch": 0.48, "grad_norm": 1.3897115356806433, "learning_rate": 5.553420967685857e-06, "loss": 0.6584, "step": 7511 }, { "epoch": 0.48, "grad_norm": 1.5335911036207737, "learning_rate": 5.552390769203322e-06, "loss": 0.7267, "step": 7512 }, { "epoch": 0.48, "grad_norm": 1.3866311825571518, "learning_rate": 5.551360546980191e-06, "loss": 0.72, "step": 7513 }, { "epoch": 0.48, "grad_norm": 1.4415816696796595, "learning_rate": 5.550330301060741e-06, "loss": 0.6543, "step": 7514 }, { "epoch": 0.48, "grad_norm": 1.761120329999675, "learning_rate": 5.549300031489252e-06, "loss": 0.7006, "step": 7515 }, { "epoch": 0.48, "grad_norm": 1.5357927944252465, "learning_rate": 5.548269738310002e-06, "loss": 0.6637, "step": 7516 }, { "epoch": 0.48, "grad_norm": 1.2380430762494816, "learning_rate": 5.547239421567272e-06, "loss": 0.6524, "step": 7517 }, { "epoch": 0.48, "grad_norm": 1.3789251633255666, "learning_rate": 5.546209081305341e-06, "loss": 0.6896, "step": 7518 }, { "epoch": 0.48, "grad_norm": 1.5505910884396514, "learning_rate": 5.545178717568491e-06, "loss": 0.7233, "step": 7519 }, { "epoch": 0.48, "grad_norm": 1.689071010707872, "learning_rate": 5.5441483304010055e-06, "loss": 0.6619, "step": 7520 }, { "epoch": 0.48, "grad_norm": 1.4592526631597713, "learning_rate": 5.5431179198471674e-06, "loss": 0.6842, "step": 7521 }, { "epoch": 0.48, "grad_norm": 1.3328098081498838, "learning_rate": 5.542087485951263e-06, "loss": 0.6256, "step": 7522 }, { "epoch": 0.48, "grad_norm": 1.6880418752283715, "learning_rate": 5.5410570287575775e-06, "loss": 0.7561, "step": 7523 }, { "epoch": 0.48, "grad_norm": 1.1860379538847257, "learning_rate": 5.540026548310397e-06, "loss": 0.5776, "step": 7524 }, { "epoch": 0.48, "grad_norm": 1.505687045581653, "learning_rate": 5.538996044654011e-06, "loss": 0.6817, "step": 7525 }, { "epoch": 0.48, "grad_norm": 1.4347796389613303, "learning_rate": 5.537965517832708e-06, "loss": 0.6966, "step": 7526 }, { "epoch": 0.48, "grad_norm": 1.540559624768805, "learning_rate": 5.536934967890778e-06, "loss": 0.6671, "step": 7527 }, { "epoch": 0.48, "grad_norm": 1.6532952933150842, "learning_rate": 5.53590439487251e-06, "loss": 0.64, "step": 7528 }, { "epoch": 0.48, "grad_norm": 1.5530980294136327, "learning_rate": 5.5348737988221964e-06, "loss": 0.5825, "step": 7529 }, { "epoch": 0.48, "grad_norm": 1.577809647722222, "learning_rate": 5.533843179784133e-06, "loss": 0.6271, "step": 7530 }, { "epoch": 0.48, "grad_norm": 1.4097816890991326, "learning_rate": 5.532812537802611e-06, "loss": 0.7293, "step": 7531 }, { "epoch": 0.48, "grad_norm": 1.593578924007422, "learning_rate": 5.5317818729219255e-06, "loss": 0.7345, "step": 7532 }, { "epoch": 0.48, "grad_norm": 1.8889326679958847, "learning_rate": 5.530751185186372e-06, "loss": 0.6368, "step": 7533 }, { "epoch": 0.48, "grad_norm": 2.0705034491895504, "learning_rate": 5.529720474640248e-06, "loss": 0.7165, "step": 7534 }, { "epoch": 0.48, "grad_norm": 1.6326052013411958, "learning_rate": 5.528689741327851e-06, "loss": 0.7765, "step": 7535 }, { "epoch": 0.48, "grad_norm": 1.7174159932387811, "learning_rate": 5.527658985293479e-06, "loss": 0.6614, "step": 7536 }, { "epoch": 0.48, "grad_norm": 1.1793576320809225, "learning_rate": 5.526628206581434e-06, "loss": 0.5944, "step": 7537 }, { "epoch": 0.48, "grad_norm": 1.2645143657172275, "learning_rate": 5.5255974052360155e-06, "loss": 0.604, "step": 7538 }, { "epoch": 0.48, "grad_norm": 1.7537808071001288, "learning_rate": 5.5245665813015245e-06, "loss": 0.6502, "step": 7539 }, { "epoch": 0.48, "grad_norm": 2.1613644224338517, "learning_rate": 5.523535734822264e-06, "loss": 0.6708, "step": 7540 }, { "epoch": 0.48, "grad_norm": 1.7139326025986676, "learning_rate": 5.522504865842537e-06, "loss": 0.7019, "step": 7541 }, { "epoch": 0.48, "grad_norm": 1.7936598878806043, "learning_rate": 5.521473974406649e-06, "loss": 0.7715, "step": 7542 }, { "epoch": 0.48, "grad_norm": 1.4852756816047235, "learning_rate": 5.520443060558907e-06, "loss": 0.7096, "step": 7543 }, { "epoch": 0.48, "grad_norm": 1.6994261169535094, "learning_rate": 5.519412124343614e-06, "loss": 0.6864, "step": 7544 }, { "epoch": 0.48, "grad_norm": 1.4795223914020044, "learning_rate": 5.51838116580508e-06, "loss": 0.6056, "step": 7545 }, { "epoch": 0.48, "grad_norm": 1.5446090587236716, "learning_rate": 5.517350184987613e-06, "loss": 0.7104, "step": 7546 }, { "epoch": 0.48, "grad_norm": 1.5125279040544806, "learning_rate": 5.5163191819355225e-06, "loss": 0.6743, "step": 7547 }, { "epoch": 0.48, "grad_norm": 1.4680153459003915, "learning_rate": 5.515288156693117e-06, "loss": 0.6808, "step": 7548 }, { "epoch": 0.48, "grad_norm": 1.69793665252804, "learning_rate": 5.514257109304712e-06, "loss": 0.6327, "step": 7549 }, { "epoch": 0.48, "grad_norm": 1.0357538745558337, "learning_rate": 5.513226039814613e-06, "loss": 0.7152, "step": 7550 }, { "epoch": 0.48, "grad_norm": 1.5894635728316107, "learning_rate": 5.51219494826714e-06, "loss": 0.6912, "step": 7551 }, { "epoch": 0.48, "grad_norm": 1.5529667758087708, "learning_rate": 5.511163834706603e-06, "loss": 0.6798, "step": 7552 }, { "epoch": 0.48, "grad_norm": 1.4798310387603162, "learning_rate": 5.51013269917732e-06, "loss": 0.7338, "step": 7553 }, { "epoch": 0.48, "grad_norm": 1.679311323410665, "learning_rate": 5.509101541723605e-06, "loss": 0.6857, "step": 7554 }, { "epoch": 0.48, "grad_norm": 1.7136411562256193, "learning_rate": 5.5080703623897754e-06, "loss": 0.7443, "step": 7555 }, { "epoch": 0.48, "grad_norm": 1.4316638443176624, "learning_rate": 5.50703916122015e-06, "loss": 0.5943, "step": 7556 }, { "epoch": 0.48, "grad_norm": 1.3286630582180328, "learning_rate": 5.506007938259045e-06, "loss": 0.6253, "step": 7557 }, { "epoch": 0.48, "grad_norm": 1.416566304992245, "learning_rate": 5.504976693550783e-06, "loss": 0.6801, "step": 7558 }, { "epoch": 0.48, "grad_norm": 1.3444689503336005, "learning_rate": 5.5039454271396856e-06, "loss": 0.7014, "step": 7559 }, { "epoch": 0.48, "grad_norm": 1.1658992092012943, "learning_rate": 5.50291413907007e-06, "loss": 0.6966, "step": 7560 }, { "epoch": 0.48, "grad_norm": 0.9805293242448765, "learning_rate": 5.501882829386262e-06, "loss": 0.6591, "step": 7561 }, { "epoch": 0.48, "grad_norm": 1.4078336911981906, "learning_rate": 5.500851498132585e-06, "loss": 0.6623, "step": 7562 }, { "epoch": 0.48, "grad_norm": 1.135824841266319, "learning_rate": 5.499820145353364e-06, "loss": 0.742, "step": 7563 }, { "epoch": 0.48, "grad_norm": 1.455788288923803, "learning_rate": 5.4987887710929235e-06, "loss": 0.7024, "step": 7564 }, { "epoch": 0.48, "grad_norm": 1.7787995024378238, "learning_rate": 5.497757375395588e-06, "loss": 0.6217, "step": 7565 }, { "epoch": 0.48, "grad_norm": 1.5397056431388345, "learning_rate": 5.496725958305687e-06, "loss": 0.673, "step": 7566 }, { "epoch": 0.48, "grad_norm": 1.872141986813212, "learning_rate": 5.49569451986755e-06, "loss": 0.7273, "step": 7567 }, { "epoch": 0.48, "grad_norm": 3.728856909166895, "learning_rate": 5.494663060125501e-06, "loss": 0.7512, "step": 7568 }, { "epoch": 0.48, "grad_norm": 1.0571993067732124, "learning_rate": 5.493631579123876e-06, "loss": 0.7513, "step": 7569 }, { "epoch": 0.48, "grad_norm": 1.2974751662485584, "learning_rate": 5.492600076907e-06, "loss": 0.6594, "step": 7570 }, { "epoch": 0.48, "grad_norm": 1.5308331544374152, "learning_rate": 5.49156855351921e-06, "loss": 0.646, "step": 7571 }, { "epoch": 0.48, "grad_norm": 1.440219448217697, "learning_rate": 5.490537009004835e-06, "loss": 0.6422, "step": 7572 }, { "epoch": 0.48, "grad_norm": 1.1348192773154964, "learning_rate": 5.4895054434082115e-06, "loss": 0.7027, "step": 7573 }, { "epoch": 0.48, "grad_norm": 1.5461028251645763, "learning_rate": 5.4884738567736715e-06, "loss": 0.7315, "step": 7574 }, { "epoch": 0.48, "grad_norm": 1.4915011357271681, "learning_rate": 5.487442249145554e-06, "loss": 0.6707, "step": 7575 }, { "epoch": 0.48, "grad_norm": 1.3766246620553784, "learning_rate": 5.48641062056819e-06, "loss": 0.7115, "step": 7576 }, { "epoch": 0.48, "grad_norm": 1.6621568829486884, "learning_rate": 5.485378971085921e-06, "loss": 0.5899, "step": 7577 }, { "epoch": 0.49, "grad_norm": 1.4752673824126674, "learning_rate": 5.484347300743083e-06, "loss": 0.7769, "step": 7578 }, { "epoch": 0.49, "grad_norm": 1.6141475379990804, "learning_rate": 5.4833156095840155e-06, "loss": 0.7911, "step": 7579 }, { "epoch": 0.49, "grad_norm": 1.6280350571703195, "learning_rate": 5.48228389765306e-06, "loss": 0.6808, "step": 7580 }, { "epoch": 0.49, "grad_norm": 1.5543918086498956, "learning_rate": 5.481252164994555e-06, "loss": 0.7378, "step": 7581 }, { "epoch": 0.49, "grad_norm": 1.7518922389504754, "learning_rate": 5.480220411652845e-06, "loss": 0.7166, "step": 7582 }, { "epoch": 0.49, "grad_norm": 1.243717001363495, "learning_rate": 5.479188637672269e-06, "loss": 0.6654, "step": 7583 }, { "epoch": 0.49, "grad_norm": 1.346805207765154, "learning_rate": 5.478156843097173e-06, "loss": 0.6561, "step": 7584 }, { "epoch": 0.49, "grad_norm": 1.4909408437501965, "learning_rate": 5.4771250279719e-06, "loss": 0.6848, "step": 7585 }, { "epoch": 0.49, "grad_norm": 1.5538437887188066, "learning_rate": 5.476093192340796e-06, "loss": 0.6927, "step": 7586 }, { "epoch": 0.49, "grad_norm": 1.482513521561725, "learning_rate": 5.475061336248208e-06, "loss": 0.6659, "step": 7587 }, { "epoch": 0.49, "grad_norm": 1.392004113742637, "learning_rate": 5.4740294597384804e-06, "loss": 0.5811, "step": 7588 }, { "epoch": 0.49, "grad_norm": 1.1830328143548874, "learning_rate": 5.4729975628559626e-06, "loss": 0.6774, "step": 7589 }, { "epoch": 0.49, "grad_norm": 1.741622973180868, "learning_rate": 5.471965645645005e-06, "loss": 0.6591, "step": 7590 }, { "epoch": 0.49, "grad_norm": 1.714126336968801, "learning_rate": 5.470933708149955e-06, "loss": 0.6114, "step": 7591 }, { "epoch": 0.49, "grad_norm": 1.5062886248620124, "learning_rate": 5.4699017504151644e-06, "loss": 0.6969, "step": 7592 }, { "epoch": 0.49, "grad_norm": 1.2238896289535859, "learning_rate": 5.468869772484982e-06, "loss": 0.7068, "step": 7593 }, { "epoch": 0.49, "grad_norm": 1.4981162037108844, "learning_rate": 5.467837774403763e-06, "loss": 0.6325, "step": 7594 }, { "epoch": 0.49, "grad_norm": 1.7616093022219983, "learning_rate": 5.4668057562158604e-06, "loss": 0.7489, "step": 7595 }, { "epoch": 0.49, "grad_norm": 1.8931000253592618, "learning_rate": 5.465773717965628e-06, "loss": 0.761, "step": 7596 }, { "epoch": 0.49, "grad_norm": 1.6325158109625246, "learning_rate": 5.464741659697419e-06, "loss": 0.789, "step": 7597 }, { "epoch": 0.49, "grad_norm": 1.448501777282572, "learning_rate": 5.463709581455588e-06, "loss": 0.7135, "step": 7598 }, { "epoch": 0.49, "grad_norm": 1.4711799323563208, "learning_rate": 5.462677483284496e-06, "loss": 0.7515, "step": 7599 }, { "epoch": 0.49, "grad_norm": 1.7499913069196664, "learning_rate": 5.461645365228496e-06, "loss": 0.7271, "step": 7600 }, { "epoch": 0.49, "grad_norm": 1.4838306934763779, "learning_rate": 5.46061322733195e-06, "loss": 0.7309, "step": 7601 }, { "epoch": 0.49, "grad_norm": 1.6404898311607976, "learning_rate": 5.459581069639215e-06, "loss": 0.7383, "step": 7602 }, { "epoch": 0.49, "grad_norm": 1.5293540835219999, "learning_rate": 5.458548892194652e-06, "loss": 0.7678, "step": 7603 }, { "epoch": 0.49, "grad_norm": 1.5875868634599712, "learning_rate": 5.457516695042621e-06, "loss": 0.7317, "step": 7604 }, { "epoch": 0.49, "grad_norm": 1.575363909071849, "learning_rate": 5.4564844782274826e-06, "loss": 0.6874, "step": 7605 }, { "epoch": 0.49, "grad_norm": 1.5543301434933428, "learning_rate": 5.455452241793602e-06, "loss": 0.7348, "step": 7606 }, { "epoch": 0.49, "grad_norm": 1.9304987228948018, "learning_rate": 5.45441998578534e-06, "loss": 0.6617, "step": 7607 }, { "epoch": 0.49, "grad_norm": 1.4800688596660971, "learning_rate": 5.4533877102470616e-06, "loss": 0.6983, "step": 7608 }, { "epoch": 0.49, "grad_norm": 1.6720368512436081, "learning_rate": 5.452355415223132e-06, "loss": 0.7099, "step": 7609 }, { "epoch": 0.49, "grad_norm": 1.795387047552808, "learning_rate": 5.4513231007579185e-06, "loss": 0.6614, "step": 7610 }, { "epoch": 0.49, "grad_norm": 1.0108527594325825, "learning_rate": 5.450290766895786e-06, "loss": 0.6766, "step": 7611 }, { "epoch": 0.49, "grad_norm": 1.4841254483595372, "learning_rate": 5.449258413681102e-06, "loss": 0.7142, "step": 7612 }, { "epoch": 0.49, "grad_norm": 1.0363142664477045, "learning_rate": 5.4482260411582365e-06, "loss": 0.5858, "step": 7613 }, { "epoch": 0.49, "grad_norm": 1.6358905786219127, "learning_rate": 5.447193649371556e-06, "loss": 0.7168, "step": 7614 }, { "epoch": 0.49, "grad_norm": 1.6943355902958217, "learning_rate": 5.446161238365433e-06, "loss": 0.6682, "step": 7615 }, { "epoch": 0.49, "grad_norm": 1.7019003608333554, "learning_rate": 5.4451288081842365e-06, "loss": 0.7455, "step": 7616 }, { "epoch": 0.49, "grad_norm": 1.5532309885247864, "learning_rate": 5.44409635887234e-06, "loss": 0.7005, "step": 7617 }, { "epoch": 0.49, "grad_norm": 1.4499491630343504, "learning_rate": 5.443063890474116e-06, "loss": 0.6807, "step": 7618 }, { "epoch": 0.49, "grad_norm": 1.6763472294146753, "learning_rate": 5.4420314030339375e-06, "loss": 0.649, "step": 7619 }, { "epoch": 0.49, "grad_norm": 1.5958589581040774, "learning_rate": 5.440998896596177e-06, "loss": 0.6908, "step": 7620 }, { "epoch": 0.49, "grad_norm": 1.5671078656552755, "learning_rate": 5.439966371205209e-06, "loss": 0.7615, "step": 7621 }, { "epoch": 0.49, "grad_norm": 1.566631817586497, "learning_rate": 5.438933826905412e-06, "loss": 0.6727, "step": 7622 }, { "epoch": 0.49, "grad_norm": 1.6771551395367021, "learning_rate": 5.437901263741163e-06, "loss": 0.7317, "step": 7623 }, { "epoch": 0.49, "grad_norm": 1.0885351361947906, "learning_rate": 5.436868681756837e-06, "loss": 0.6776, "step": 7624 }, { "epoch": 0.49, "grad_norm": 1.8636838579272594, "learning_rate": 5.435836080996812e-06, "loss": 0.8221, "step": 7625 }, { "epoch": 0.49, "grad_norm": 1.045858854311918, "learning_rate": 5.434803461505469e-06, "loss": 0.6474, "step": 7626 }, { "epoch": 0.49, "grad_norm": 1.5072767724907195, "learning_rate": 5.433770823327187e-06, "loss": 0.6663, "step": 7627 }, { "epoch": 0.49, "grad_norm": 1.7962040421333287, "learning_rate": 5.432738166506346e-06, "loss": 0.6905, "step": 7628 }, { "epoch": 0.49, "grad_norm": 1.4706796736457974, "learning_rate": 5.431705491087327e-06, "loss": 0.6866, "step": 7629 }, { "epoch": 0.49, "grad_norm": 1.3025425039927199, "learning_rate": 5.4306727971145145e-06, "loss": 0.6197, "step": 7630 }, { "epoch": 0.49, "grad_norm": 1.5120329653107814, "learning_rate": 5.42964008463229e-06, "loss": 0.6762, "step": 7631 }, { "epoch": 0.49, "grad_norm": 1.0395139877388255, "learning_rate": 5.428607353685038e-06, "loss": 0.6156, "step": 7632 }, { "epoch": 0.49, "grad_norm": 1.4623401499961401, "learning_rate": 5.427574604317142e-06, "loss": 0.7239, "step": 7633 }, { "epoch": 0.49, "grad_norm": 1.1186435471042973, "learning_rate": 5.426541836572988e-06, "loss": 0.7188, "step": 7634 }, { "epoch": 0.49, "grad_norm": 1.5531197982812597, "learning_rate": 5.4255090504969606e-06, "loss": 0.6897, "step": 7635 }, { "epoch": 0.49, "grad_norm": 1.229202437753175, "learning_rate": 5.42447624613345e-06, "loss": 0.7035, "step": 7636 }, { "epoch": 0.49, "grad_norm": 1.5304374628518307, "learning_rate": 5.423443423526842e-06, "loss": 0.6933, "step": 7637 }, { "epoch": 0.49, "grad_norm": 1.6910386116324383, "learning_rate": 5.422410582721526e-06, "loss": 0.6188, "step": 7638 }, { "epoch": 0.49, "grad_norm": 1.6016581917943584, "learning_rate": 5.42137772376189e-06, "loss": 0.709, "step": 7639 }, { "epoch": 0.49, "grad_norm": 1.3769309807112928, "learning_rate": 5.420344846692326e-06, "loss": 0.6776, "step": 7640 }, { "epoch": 0.49, "grad_norm": 1.354510696981314, "learning_rate": 5.419311951557222e-06, "loss": 0.7267, "step": 7641 }, { "epoch": 0.49, "grad_norm": 1.7602258578049557, "learning_rate": 5.418279038400973e-06, "loss": 0.7417, "step": 7642 }, { "epoch": 0.49, "grad_norm": 1.0916552661059127, "learning_rate": 5.417246107267968e-06, "loss": 0.6622, "step": 7643 }, { "epoch": 0.49, "grad_norm": 1.4763558952424694, "learning_rate": 5.416213158202602e-06, "loss": 0.6856, "step": 7644 }, { "epoch": 0.49, "grad_norm": 1.5538693922998426, "learning_rate": 5.415180191249271e-06, "loss": 0.6468, "step": 7645 }, { "epoch": 0.49, "grad_norm": 1.7276150934020855, "learning_rate": 5.414147206452367e-06, "loss": 0.7441, "step": 7646 }, { "epoch": 0.49, "grad_norm": 1.6207668003843332, "learning_rate": 5.413114203856287e-06, "loss": 0.7948, "step": 7647 }, { "epoch": 0.49, "grad_norm": 1.6250579367144782, "learning_rate": 5.4120811835054265e-06, "loss": 0.7651, "step": 7648 }, { "epoch": 0.49, "grad_norm": 1.7131551633219373, "learning_rate": 5.411048145444182e-06, "loss": 0.715, "step": 7649 }, { "epoch": 0.49, "grad_norm": 1.339705388225673, "learning_rate": 5.4100150897169514e-06, "loss": 0.656, "step": 7650 }, { "epoch": 0.49, "grad_norm": 1.363167680375009, "learning_rate": 5.408982016368134e-06, "loss": 0.6049, "step": 7651 }, { "epoch": 0.49, "grad_norm": 1.436602826579923, "learning_rate": 5.40794892544213e-06, "loss": 0.6494, "step": 7652 }, { "epoch": 0.49, "grad_norm": 1.7496184621417754, "learning_rate": 5.406915816983337e-06, "loss": 0.6682, "step": 7653 }, { "epoch": 0.49, "grad_norm": 1.6482898054726056, "learning_rate": 5.405882691036158e-06, "loss": 0.698, "step": 7654 }, { "epoch": 0.49, "grad_norm": 1.6690156259914954, "learning_rate": 5.404849547644993e-06, "loss": 0.585, "step": 7655 }, { "epoch": 0.49, "grad_norm": 1.5952630885434895, "learning_rate": 5.403816386854247e-06, "loss": 0.6928, "step": 7656 }, { "epoch": 0.49, "grad_norm": 1.4851214088255145, "learning_rate": 5.40278320870832e-06, "loss": 0.6911, "step": 7657 }, { "epoch": 0.49, "grad_norm": 1.4326702638580917, "learning_rate": 5.401750013251617e-06, "loss": 0.6255, "step": 7658 }, { "epoch": 0.49, "grad_norm": 1.5538176815671807, "learning_rate": 5.400716800528542e-06, "loss": 0.6599, "step": 7659 }, { "epoch": 0.49, "grad_norm": 1.555616066846143, "learning_rate": 5.399683570583503e-06, "loss": 0.6894, "step": 7660 }, { "epoch": 0.49, "grad_norm": 1.327758806673219, "learning_rate": 5.398650323460904e-06, "loss": 0.6787, "step": 7661 }, { "epoch": 0.49, "grad_norm": 1.4447263156871928, "learning_rate": 5.3976170592051505e-06, "loss": 0.5628, "step": 7662 }, { "epoch": 0.49, "grad_norm": 1.4588345203620225, "learning_rate": 5.39658377786065e-06, "loss": 0.6741, "step": 7663 }, { "epoch": 0.49, "grad_norm": 1.5578741289093465, "learning_rate": 5.3955504794718135e-06, "loss": 0.6424, "step": 7664 }, { "epoch": 0.49, "grad_norm": 1.5429128649059765, "learning_rate": 5.394517164083047e-06, "loss": 0.6313, "step": 7665 }, { "epoch": 0.49, "grad_norm": 1.5442614162198878, "learning_rate": 5.393483831738764e-06, "loss": 0.7335, "step": 7666 }, { "epoch": 0.49, "grad_norm": 2.0606148013155687, "learning_rate": 5.392450482483372e-06, "loss": 0.7906, "step": 7667 }, { "epoch": 0.49, "grad_norm": 1.489209795658086, "learning_rate": 5.391417116361284e-06, "loss": 0.6072, "step": 7668 }, { "epoch": 0.49, "grad_norm": 1.573627758104086, "learning_rate": 5.39038373341691e-06, "loss": 0.6908, "step": 7669 }, { "epoch": 0.49, "grad_norm": 1.6253848330679659, "learning_rate": 5.3893503336946625e-06, "loss": 0.6599, "step": 7670 }, { "epoch": 0.49, "grad_norm": 1.6617078074961695, "learning_rate": 5.388316917238956e-06, "loss": 0.8311, "step": 7671 }, { "epoch": 0.49, "grad_norm": 1.4965308403259676, "learning_rate": 5.3872834840942035e-06, "loss": 0.6658, "step": 7672 }, { "epoch": 0.49, "grad_norm": 1.4833885976596934, "learning_rate": 5.386250034304823e-06, "loss": 0.6916, "step": 7673 }, { "epoch": 0.49, "grad_norm": 1.1112848708212495, "learning_rate": 5.385216567915226e-06, "loss": 0.7391, "step": 7674 }, { "epoch": 0.49, "grad_norm": 1.6869954632518207, "learning_rate": 5.384183084969832e-06, "loss": 0.6772, "step": 7675 }, { "epoch": 0.49, "grad_norm": 1.4264979574439973, "learning_rate": 5.383149585513055e-06, "loss": 0.6402, "step": 7676 }, { "epoch": 0.49, "grad_norm": 1.6358638731116195, "learning_rate": 5.382116069589315e-06, "loss": 0.6563, "step": 7677 }, { "epoch": 0.49, "grad_norm": 1.4737321633434863, "learning_rate": 5.381082537243028e-06, "loss": 0.6655, "step": 7678 }, { "epoch": 0.49, "grad_norm": 1.842034341213449, "learning_rate": 5.380048988518613e-06, "loss": 0.7536, "step": 7679 }, { "epoch": 0.49, "grad_norm": 1.2870734714559808, "learning_rate": 5.379015423460494e-06, "loss": 0.7356, "step": 7680 }, { "epoch": 0.49, "grad_norm": 1.6028249784730864, "learning_rate": 5.3779818421130865e-06, "loss": 0.6839, "step": 7681 }, { "epoch": 0.49, "grad_norm": 1.6739736718947282, "learning_rate": 5.3769482445208145e-06, "loss": 0.6079, "step": 7682 }, { "epoch": 0.49, "grad_norm": 1.982343429390867, "learning_rate": 5.3759146307281e-06, "loss": 0.7127, "step": 7683 }, { "epoch": 0.49, "grad_norm": 1.5896313931199, "learning_rate": 5.374881000779364e-06, "loss": 0.6562, "step": 7684 }, { "epoch": 0.49, "grad_norm": 1.4144571800216843, "learning_rate": 5.37384735471903e-06, "loss": 0.6246, "step": 7685 }, { "epoch": 0.49, "grad_norm": 1.8832386511358814, "learning_rate": 5.372813692591521e-06, "loss": 0.7552, "step": 7686 }, { "epoch": 0.49, "grad_norm": 1.6096957846911246, "learning_rate": 5.371780014441265e-06, "loss": 0.706, "step": 7687 }, { "epoch": 0.49, "grad_norm": 1.4924984388701616, "learning_rate": 5.370746320312684e-06, "loss": 0.7234, "step": 7688 }, { "epoch": 0.49, "grad_norm": 1.5064136196210514, "learning_rate": 5.369712610250206e-06, "loss": 0.6359, "step": 7689 }, { "epoch": 0.49, "grad_norm": 1.5507512511392407, "learning_rate": 5.368678884298255e-06, "loss": 0.6099, "step": 7690 }, { "epoch": 0.49, "grad_norm": 1.5714542396853581, "learning_rate": 5.367645142501262e-06, "loss": 0.6152, "step": 7691 }, { "epoch": 0.49, "grad_norm": 1.5023165868832433, "learning_rate": 5.366611384903653e-06, "loss": 0.6039, "step": 7692 }, { "epoch": 0.49, "grad_norm": 1.5509267353303853, "learning_rate": 5.365577611549856e-06, "loss": 0.8159, "step": 7693 }, { "epoch": 0.49, "grad_norm": 1.57802718384539, "learning_rate": 5.364543822484303e-06, "loss": 0.7098, "step": 7694 }, { "epoch": 0.49, "grad_norm": 1.193514291354129, "learning_rate": 5.363510017751422e-06, "loss": 0.6275, "step": 7695 }, { "epoch": 0.49, "grad_norm": 1.9046544795940816, "learning_rate": 5.3624761973956456e-06, "loss": 0.7529, "step": 7696 }, { "epoch": 0.49, "grad_norm": 1.3993082640417331, "learning_rate": 5.3614423614614016e-06, "loss": 0.6014, "step": 7697 }, { "epoch": 0.49, "grad_norm": 1.4540549816200814, "learning_rate": 5.360408509993126e-06, "loss": 0.6182, "step": 7698 }, { "epoch": 0.49, "grad_norm": 1.6019662912188166, "learning_rate": 5.359374643035248e-06, "loss": 0.718, "step": 7699 }, { "epoch": 0.49, "grad_norm": 1.6601760420355116, "learning_rate": 5.358340760632205e-06, "loss": 0.6906, "step": 7700 }, { "epoch": 0.49, "grad_norm": 1.7277338232688633, "learning_rate": 5.357306862828427e-06, "loss": 0.6175, "step": 7701 }, { "epoch": 0.49, "grad_norm": 1.4166861112757716, "learning_rate": 5.356272949668353e-06, "loss": 0.6347, "step": 7702 }, { "epoch": 0.49, "grad_norm": 1.8248386385413193, "learning_rate": 5.355239021196416e-06, "loss": 0.7113, "step": 7703 }, { "epoch": 0.49, "grad_norm": 1.7230335363414708, "learning_rate": 5.3542050774570505e-06, "loss": 0.7191, "step": 7704 }, { "epoch": 0.49, "grad_norm": 1.1152427701176268, "learning_rate": 5.353171118494698e-06, "loss": 0.7978, "step": 7705 }, { "epoch": 0.49, "grad_norm": 1.0178321173654192, "learning_rate": 5.3521371443537904e-06, "loss": 0.6346, "step": 7706 }, { "epoch": 0.49, "grad_norm": 1.4292159147577808, "learning_rate": 5.351103155078768e-06, "loss": 0.662, "step": 7707 }, { "epoch": 0.49, "grad_norm": 1.4588039208720727, "learning_rate": 5.350069150714072e-06, "loss": 0.6359, "step": 7708 }, { "epoch": 0.49, "grad_norm": 1.449337636579231, "learning_rate": 5.349035131304138e-06, "loss": 0.6635, "step": 7709 }, { "epoch": 0.49, "grad_norm": 1.75704483789811, "learning_rate": 5.348001096893408e-06, "loss": 0.6971, "step": 7710 }, { "epoch": 0.49, "grad_norm": 1.5594144478163332, "learning_rate": 5.346967047526321e-06, "loss": 0.7333, "step": 7711 }, { "epoch": 0.49, "grad_norm": 1.147238440932646, "learning_rate": 5.345932983247322e-06, "loss": 0.5753, "step": 7712 }, { "epoch": 0.49, "grad_norm": 1.3344508912450834, "learning_rate": 5.344898904100848e-06, "loss": 0.6408, "step": 7713 }, { "epoch": 0.49, "grad_norm": 1.495906839433934, "learning_rate": 5.343864810131344e-06, "loss": 0.7233, "step": 7714 }, { "epoch": 0.49, "grad_norm": 1.6129464639145175, "learning_rate": 5.342830701383254e-06, "loss": 0.6041, "step": 7715 }, { "epoch": 0.49, "grad_norm": 1.407018025138565, "learning_rate": 5.34179657790102e-06, "loss": 0.6444, "step": 7716 }, { "epoch": 0.49, "grad_norm": 1.5501547024771478, "learning_rate": 5.340762439729088e-06, "loss": 0.6911, "step": 7717 }, { "epoch": 0.49, "grad_norm": 1.8400594963442587, "learning_rate": 5.3397282869119015e-06, "loss": 0.7399, "step": 7718 }, { "epoch": 0.49, "grad_norm": 1.34058576688152, "learning_rate": 5.338694119493908e-06, "loss": 0.5902, "step": 7719 }, { "epoch": 0.49, "grad_norm": 1.6687231239036808, "learning_rate": 5.337659937519553e-06, "loss": 0.6059, "step": 7720 }, { "epoch": 0.49, "grad_norm": 1.705151382591181, "learning_rate": 5.336625741033283e-06, "loss": 0.6858, "step": 7721 }, { "epoch": 0.49, "grad_norm": 1.7265272536402996, "learning_rate": 5.335591530079545e-06, "loss": 0.7387, "step": 7722 }, { "epoch": 0.49, "grad_norm": 1.6446265641369031, "learning_rate": 5.33455730470279e-06, "loss": 0.7007, "step": 7723 }, { "epoch": 0.49, "grad_norm": 1.1679618003381047, "learning_rate": 5.333523064947464e-06, "loss": 0.6757, "step": 7724 }, { "epoch": 0.49, "grad_norm": 1.6522611244733356, "learning_rate": 5.332488810858017e-06, "loss": 0.7032, "step": 7725 }, { "epoch": 0.49, "grad_norm": 1.5758150132333435, "learning_rate": 5.331454542478901e-06, "loss": 0.6884, "step": 7726 }, { "epoch": 0.49, "grad_norm": 1.040196934291353, "learning_rate": 5.330420259854564e-06, "loss": 0.6407, "step": 7727 }, { "epoch": 0.49, "grad_norm": 1.8396519904217612, "learning_rate": 5.32938596302946e-06, "loss": 0.7418, "step": 7728 }, { "epoch": 0.49, "grad_norm": 1.6251992201821868, "learning_rate": 5.328351652048037e-06, "loss": 0.7332, "step": 7729 }, { "epoch": 0.49, "grad_norm": 1.499425095701834, "learning_rate": 5.327317326954752e-06, "loss": 0.7233, "step": 7730 }, { "epoch": 0.49, "grad_norm": 1.0969520093356724, "learning_rate": 5.326282987794056e-06, "loss": 0.6505, "step": 7731 }, { "epoch": 0.49, "grad_norm": 1.783748079456221, "learning_rate": 5.325248634610403e-06, "loss": 0.7157, "step": 7732 }, { "epoch": 0.49, "grad_norm": 1.6801041079602446, "learning_rate": 5.3242142674482456e-06, "loss": 0.7747, "step": 7733 }, { "epoch": 0.5, "grad_norm": 1.5450358418904795, "learning_rate": 5.3231798863520415e-06, "loss": 0.6502, "step": 7734 }, { "epoch": 0.5, "grad_norm": 1.600646598199627, "learning_rate": 5.322145491366244e-06, "loss": 0.7071, "step": 7735 }, { "epoch": 0.5, "grad_norm": 1.454410359768025, "learning_rate": 5.32111108253531e-06, "loss": 0.7171, "step": 7736 }, { "epoch": 0.5, "grad_norm": 1.047142928293517, "learning_rate": 5.320076659903698e-06, "loss": 0.6106, "step": 7737 }, { "epoch": 0.5, "grad_norm": 1.5486146441731448, "learning_rate": 5.319042223515862e-06, "loss": 0.6366, "step": 7738 }, { "epoch": 0.5, "grad_norm": 1.5631730110885889, "learning_rate": 5.318007773416264e-06, "loss": 0.6654, "step": 7739 }, { "epoch": 0.5, "grad_norm": 1.5183476887796, "learning_rate": 5.316973309649358e-06, "loss": 0.7054, "step": 7740 }, { "epoch": 0.5, "grad_norm": 1.607922634195938, "learning_rate": 5.315938832259606e-06, "loss": 0.6597, "step": 7741 }, { "epoch": 0.5, "grad_norm": 1.4629997844375051, "learning_rate": 5.314904341291468e-06, "loss": 0.7173, "step": 7742 }, { "epoch": 0.5, "grad_norm": 1.4865767492996715, "learning_rate": 5.3138698367894e-06, "loss": 0.753, "step": 7743 }, { "epoch": 0.5, "grad_norm": 1.5933918368379432, "learning_rate": 5.312835318797868e-06, "loss": 0.5852, "step": 7744 }, { "epoch": 0.5, "grad_norm": 1.5539744308255217, "learning_rate": 5.311800787361332e-06, "loss": 0.7422, "step": 7745 }, { "epoch": 0.5, "grad_norm": 1.4819348322757502, "learning_rate": 5.310766242524253e-06, "loss": 0.673, "step": 7746 }, { "epoch": 0.5, "grad_norm": 1.7868895056117946, "learning_rate": 5.309731684331093e-06, "loss": 0.6746, "step": 7747 }, { "epoch": 0.5, "grad_norm": 1.4596063401781385, "learning_rate": 5.308697112826316e-06, "loss": 0.7339, "step": 7748 }, { "epoch": 0.5, "grad_norm": 1.10940991460237, "learning_rate": 5.307662528054387e-06, "loss": 0.6433, "step": 7749 }, { "epoch": 0.5, "grad_norm": 1.78104111982905, "learning_rate": 5.306627930059768e-06, "loss": 0.6713, "step": 7750 }, { "epoch": 0.5, "grad_norm": 1.6474156932910846, "learning_rate": 5.305593318886925e-06, "loss": 0.6776, "step": 7751 }, { "epoch": 0.5, "grad_norm": 1.083209340660356, "learning_rate": 5.304558694580324e-06, "loss": 0.6281, "step": 7752 }, { "epoch": 0.5, "grad_norm": 1.6920047131921623, "learning_rate": 5.303524057184429e-06, "loss": 0.7132, "step": 7753 }, { "epoch": 0.5, "grad_norm": 1.569338652260289, "learning_rate": 5.30248940674371e-06, "loss": 0.7731, "step": 7754 }, { "epoch": 0.5, "grad_norm": 1.4325470837902536, "learning_rate": 5.301454743302629e-06, "loss": 0.6137, "step": 7755 }, { "epoch": 0.5, "grad_norm": 1.3919307629144788, "learning_rate": 5.3004200669056585e-06, "loss": 0.6957, "step": 7756 }, { "epoch": 0.5, "grad_norm": 1.3681119122934577, "learning_rate": 5.299385377597265e-06, "loss": 0.6982, "step": 7757 }, { "epoch": 0.5, "grad_norm": 1.4966351432477663, "learning_rate": 5.298350675421915e-06, "loss": 0.6736, "step": 7758 }, { "epoch": 0.5, "grad_norm": 2.0739110338109334, "learning_rate": 5.297315960424081e-06, "loss": 0.6964, "step": 7759 }, { "epoch": 0.5, "grad_norm": 1.5160364045613572, "learning_rate": 5.296281232648233e-06, "loss": 0.6419, "step": 7760 }, { "epoch": 0.5, "grad_norm": 1.4634235321429419, "learning_rate": 5.295246492138839e-06, "loss": 0.6668, "step": 7761 }, { "epoch": 0.5, "grad_norm": 1.362908839963184, "learning_rate": 5.294211738940373e-06, "loss": 0.6261, "step": 7762 }, { "epoch": 0.5, "grad_norm": 1.3669189099247614, "learning_rate": 5.293176973097303e-06, "loss": 0.5863, "step": 7763 }, { "epoch": 0.5, "grad_norm": 1.5229830804918516, "learning_rate": 5.292142194654102e-06, "loss": 0.6921, "step": 7764 }, { "epoch": 0.5, "grad_norm": 1.5033827100547041, "learning_rate": 5.2911074036552426e-06, "loss": 0.7162, "step": 7765 }, { "epoch": 0.5, "grad_norm": 1.33750953742553, "learning_rate": 5.290072600145201e-06, "loss": 0.5987, "step": 7766 }, { "epoch": 0.5, "grad_norm": 1.89910828829929, "learning_rate": 5.289037784168448e-06, "loss": 0.7421, "step": 7767 }, { "epoch": 0.5, "grad_norm": 1.549662924402785, "learning_rate": 5.288002955769458e-06, "loss": 0.6024, "step": 7768 }, { "epoch": 0.5, "grad_norm": 2.543611265333428, "learning_rate": 5.286968114992706e-06, "loss": 0.6339, "step": 7769 }, { "epoch": 0.5, "grad_norm": 1.7620851651236151, "learning_rate": 5.285933261882668e-06, "loss": 0.658, "step": 7770 }, { "epoch": 0.5, "grad_norm": 1.4886646350453059, "learning_rate": 5.28489839648382e-06, "loss": 0.6375, "step": 7771 }, { "epoch": 0.5, "grad_norm": 1.7119458443210616, "learning_rate": 5.2838635188406365e-06, "loss": 0.7375, "step": 7772 }, { "epoch": 0.5, "grad_norm": 1.090026023859756, "learning_rate": 5.282828628997595e-06, "loss": 0.5384, "step": 7773 }, { "epoch": 0.5, "grad_norm": 1.5259332777032335, "learning_rate": 5.281793726999174e-06, "loss": 0.6048, "step": 7774 }, { "epoch": 0.5, "grad_norm": 1.446582639383751, "learning_rate": 5.28075881288985e-06, "loss": 0.6404, "step": 7775 }, { "epoch": 0.5, "grad_norm": 2.189754811069585, "learning_rate": 5.279723886714103e-06, "loss": 0.6626, "step": 7776 }, { "epoch": 0.5, "grad_norm": 1.5648087336079128, "learning_rate": 5.2786889485164115e-06, "loss": 0.6629, "step": 7777 }, { "epoch": 0.5, "grad_norm": 1.5971230598866701, "learning_rate": 5.277653998341255e-06, "loss": 0.7172, "step": 7778 }, { "epoch": 0.5, "grad_norm": 1.4647572808110987, "learning_rate": 5.276619036233111e-06, "loss": 0.6832, "step": 7779 }, { "epoch": 0.5, "grad_norm": 1.559230751785381, "learning_rate": 5.275584062236463e-06, "loss": 0.6834, "step": 7780 }, { "epoch": 0.5, "grad_norm": 1.0094148227793058, "learning_rate": 5.274549076395792e-06, "loss": 0.6615, "step": 7781 }, { "epoch": 0.5, "grad_norm": 1.5154379603779584, "learning_rate": 5.273514078755577e-06, "loss": 0.7312, "step": 7782 }, { "epoch": 0.5, "grad_norm": 1.5422250661988326, "learning_rate": 5.2724790693603025e-06, "loss": 0.668, "step": 7783 }, { "epoch": 0.5, "grad_norm": 1.5680600477651718, "learning_rate": 5.27144404825445e-06, "loss": 0.647, "step": 7784 }, { "epoch": 0.5, "grad_norm": 1.8409158630122584, "learning_rate": 5.270409015482504e-06, "loss": 0.6851, "step": 7785 }, { "epoch": 0.5, "grad_norm": 1.5133556723917247, "learning_rate": 5.269373971088943e-06, "loss": 0.6918, "step": 7786 }, { "epoch": 0.5, "grad_norm": 1.1484203633601529, "learning_rate": 5.268338915118258e-06, "loss": 0.7033, "step": 7787 }, { "epoch": 0.5, "grad_norm": 1.4304816687408837, "learning_rate": 5.267303847614931e-06, "loss": 0.714, "step": 7788 }, { "epoch": 0.5, "grad_norm": 1.933608537055529, "learning_rate": 5.266268768623445e-06, "loss": 0.6619, "step": 7789 }, { "epoch": 0.5, "grad_norm": 1.6448900914458526, "learning_rate": 5.2652336781882865e-06, "loss": 0.6828, "step": 7790 }, { "epoch": 0.5, "grad_norm": 1.6433028488555368, "learning_rate": 5.264198576353942e-06, "loss": 0.6413, "step": 7791 }, { "epoch": 0.5, "grad_norm": 1.157836406329468, "learning_rate": 5.263163463164898e-06, "loss": 0.6098, "step": 7792 }, { "epoch": 0.5, "grad_norm": 1.4597862527469447, "learning_rate": 5.262128338665641e-06, "loss": 0.6398, "step": 7793 }, { "epoch": 0.5, "grad_norm": 1.6078891645487365, "learning_rate": 5.261093202900659e-06, "loss": 0.6139, "step": 7794 }, { "epoch": 0.5, "grad_norm": 1.4668955910526633, "learning_rate": 5.26005805591444e-06, "loss": 0.5649, "step": 7795 }, { "epoch": 0.5, "grad_norm": 1.6133846987479943, "learning_rate": 5.259022897751473e-06, "loss": 0.7184, "step": 7796 }, { "epoch": 0.5, "grad_norm": 1.6247216697967224, "learning_rate": 5.257987728456244e-06, "loss": 0.6992, "step": 7797 }, { "epoch": 0.5, "grad_norm": 1.831729389536679, "learning_rate": 5.256952548073246e-06, "loss": 0.7574, "step": 7798 }, { "epoch": 0.5, "grad_norm": 1.6028212922395013, "learning_rate": 5.255917356646968e-06, "loss": 0.725, "step": 7799 }, { "epoch": 0.5, "grad_norm": 1.677678551235122, "learning_rate": 5.254882154221898e-06, "loss": 0.647, "step": 7800 }, { "epoch": 0.5, "grad_norm": 1.0691308708041736, "learning_rate": 5.2538469408425284e-06, "loss": 0.7202, "step": 7801 }, { "epoch": 0.5, "grad_norm": 1.7667361242994288, "learning_rate": 5.252811716553352e-06, "loss": 0.7071, "step": 7802 }, { "epoch": 0.5, "grad_norm": 1.4697785443143134, "learning_rate": 5.25177648139886e-06, "loss": 0.6007, "step": 7803 }, { "epoch": 0.5, "grad_norm": 1.4263293005210986, "learning_rate": 5.250741235423543e-06, "loss": 0.6688, "step": 7804 }, { "epoch": 0.5, "grad_norm": 1.4712483480981546, "learning_rate": 5.2497059786718955e-06, "loss": 0.67, "step": 7805 }, { "epoch": 0.5, "grad_norm": 1.5024329714779032, "learning_rate": 5.2486707111884085e-06, "loss": 0.7094, "step": 7806 }, { "epoch": 0.5, "grad_norm": 1.4900917064265249, "learning_rate": 5.247635433017576e-06, "loss": 0.6867, "step": 7807 }, { "epoch": 0.5, "grad_norm": 1.6890191007867836, "learning_rate": 5.246600144203895e-06, "loss": 0.6678, "step": 7808 }, { "epoch": 0.5, "grad_norm": 1.6944437864753263, "learning_rate": 5.245564844791857e-06, "loss": 0.7618, "step": 7809 }, { "epoch": 0.5, "grad_norm": 1.4283551150335558, "learning_rate": 5.244529534825958e-06, "loss": 0.551, "step": 7810 }, { "epoch": 0.5, "grad_norm": 1.5435677791244238, "learning_rate": 5.243494214350693e-06, "loss": 0.6635, "step": 7811 }, { "epoch": 0.5, "grad_norm": 1.625061715893422, "learning_rate": 5.242458883410558e-06, "loss": 0.6006, "step": 7812 }, { "epoch": 0.5, "grad_norm": 1.45440089368667, "learning_rate": 5.2414235420500516e-06, "loss": 0.6324, "step": 7813 }, { "epoch": 0.5, "grad_norm": 1.4272187892534656, "learning_rate": 5.240388190313668e-06, "loss": 0.7578, "step": 7814 }, { "epoch": 0.5, "grad_norm": 1.4657446067271769, "learning_rate": 5.2393528282459036e-06, "loss": 0.6519, "step": 7815 }, { "epoch": 0.5, "grad_norm": 1.542901535084005, "learning_rate": 5.238317455891259e-06, "loss": 0.7187, "step": 7816 }, { "epoch": 0.5, "grad_norm": 2.0480741980910104, "learning_rate": 5.237282073294231e-06, "loss": 0.7183, "step": 7817 }, { "epoch": 0.5, "grad_norm": 1.6261698484677884, "learning_rate": 5.236246680499317e-06, "loss": 0.7434, "step": 7818 }, { "epoch": 0.5, "grad_norm": 1.7356107869652793, "learning_rate": 5.235211277551019e-06, "loss": 0.6658, "step": 7819 }, { "epoch": 0.5, "grad_norm": 1.5921542535555508, "learning_rate": 5.234175864493832e-06, "loss": 0.7283, "step": 7820 }, { "epoch": 0.5, "grad_norm": 1.3641554338609532, "learning_rate": 5.2331404413722595e-06, "loss": 0.6535, "step": 7821 }, { "epoch": 0.5, "grad_norm": 1.7472993850781064, "learning_rate": 5.2321050082308e-06, "loss": 0.7334, "step": 7822 }, { "epoch": 0.5, "grad_norm": 1.561138130875792, "learning_rate": 5.231069565113957e-06, "loss": 0.6192, "step": 7823 }, { "epoch": 0.5, "grad_norm": 1.0521831313110923, "learning_rate": 5.230034112066228e-06, "loss": 0.5635, "step": 7824 }, { "epoch": 0.5, "grad_norm": 1.4736765135765137, "learning_rate": 5.2289986491321176e-06, "loss": 0.7236, "step": 7825 }, { "epoch": 0.5, "grad_norm": 1.5320925421094924, "learning_rate": 5.227963176356126e-06, "loss": 0.7214, "step": 7826 }, { "epoch": 0.5, "grad_norm": 1.0792725136090005, "learning_rate": 5.226927693782755e-06, "loss": 0.6383, "step": 7827 }, { "epoch": 0.5, "grad_norm": 1.9860982608586342, "learning_rate": 5.225892201456509e-06, "loss": 0.6868, "step": 7828 }, { "epoch": 0.5, "grad_norm": 1.786151544618506, "learning_rate": 5.22485669942189e-06, "loss": 0.6758, "step": 7829 }, { "epoch": 0.5, "grad_norm": 1.5062014155634138, "learning_rate": 5.223821187723403e-06, "loss": 0.6261, "step": 7830 }, { "epoch": 0.5, "grad_norm": 1.0971164152936521, "learning_rate": 5.222785666405553e-06, "loss": 0.6146, "step": 7831 }, { "epoch": 0.5, "grad_norm": 1.4401000894697658, "learning_rate": 5.221750135512843e-06, "loss": 0.6621, "step": 7832 }, { "epoch": 0.5, "grad_norm": 1.4981068001892084, "learning_rate": 5.220714595089777e-06, "loss": 0.6916, "step": 7833 }, { "epoch": 0.5, "grad_norm": 1.9150880007077913, "learning_rate": 5.219679045180863e-06, "loss": 0.7168, "step": 7834 }, { "epoch": 0.5, "grad_norm": 1.511678804448821, "learning_rate": 5.218643485830604e-06, "loss": 0.5352, "step": 7835 }, { "epoch": 0.5, "grad_norm": 1.4082830578298349, "learning_rate": 5.217607917083508e-06, "loss": 0.5964, "step": 7836 }, { "epoch": 0.5, "grad_norm": 1.6407255037992925, "learning_rate": 5.216572338984081e-06, "loss": 0.707, "step": 7837 }, { "epoch": 0.5, "grad_norm": 1.6271848363620258, "learning_rate": 5.215536751576829e-06, "loss": 0.6811, "step": 7838 }, { "epoch": 0.5, "grad_norm": 1.5274372710518416, "learning_rate": 5.21450115490626e-06, "loss": 0.7162, "step": 7839 }, { "epoch": 0.5, "grad_norm": 1.7147745495032831, "learning_rate": 5.213465549016885e-06, "loss": 0.6888, "step": 7840 }, { "epoch": 0.5, "grad_norm": 1.519402176347185, "learning_rate": 5.212429933953207e-06, "loss": 0.7125, "step": 7841 }, { "epoch": 0.5, "grad_norm": 1.3366727173982327, "learning_rate": 5.211394309759737e-06, "loss": 0.6286, "step": 7842 }, { "epoch": 0.5, "grad_norm": 1.6819197180395233, "learning_rate": 5.210358676480983e-06, "loss": 0.6734, "step": 7843 }, { "epoch": 0.5, "grad_norm": 1.4322475532522772, "learning_rate": 5.209323034161458e-06, "loss": 0.6678, "step": 7844 }, { "epoch": 0.5, "grad_norm": 1.5747758973393546, "learning_rate": 5.208287382845666e-06, "loss": 0.663, "step": 7845 }, { "epoch": 0.5, "grad_norm": 1.877818159786664, "learning_rate": 5.207251722578121e-06, "loss": 0.6654, "step": 7846 }, { "epoch": 0.5, "grad_norm": 1.7713340164561042, "learning_rate": 5.206216053403333e-06, "loss": 0.656, "step": 7847 }, { "epoch": 0.5, "grad_norm": 1.43583548184844, "learning_rate": 5.20518037536581e-06, "loss": 0.6948, "step": 7848 }, { "epoch": 0.5, "grad_norm": 0.9845524325627959, "learning_rate": 5.204144688510069e-06, "loss": 0.6442, "step": 7849 }, { "epoch": 0.5, "grad_norm": 2.4700035315252955, "learning_rate": 5.203108992880616e-06, "loss": 0.6758, "step": 7850 }, { "epoch": 0.5, "grad_norm": 1.5828095019420056, "learning_rate": 5.202073288521965e-06, "loss": 0.7396, "step": 7851 }, { "epoch": 0.5, "grad_norm": 1.439289909218122, "learning_rate": 5.20103757547863e-06, "loss": 0.7062, "step": 7852 }, { "epoch": 0.5, "grad_norm": 1.4701764668641484, "learning_rate": 5.2000018537951226e-06, "loss": 0.6302, "step": 7853 }, { "epoch": 0.5, "grad_norm": 1.5541188392678702, "learning_rate": 5.198966123515955e-06, "loss": 0.73, "step": 7854 }, { "epoch": 0.5, "grad_norm": 1.6349922037776343, "learning_rate": 5.197930384685642e-06, "loss": 0.6178, "step": 7855 }, { "epoch": 0.5, "grad_norm": 1.6696242122804685, "learning_rate": 5.1968946373486964e-06, "loss": 0.7502, "step": 7856 }, { "epoch": 0.5, "grad_norm": 1.7012501136473877, "learning_rate": 5.195858881549633e-06, "loss": 0.7464, "step": 7857 }, { "epoch": 0.5, "grad_norm": 1.4266509536170997, "learning_rate": 5.194823117332966e-06, "loss": 0.6556, "step": 7858 }, { "epoch": 0.5, "grad_norm": 1.1719110225572738, "learning_rate": 5.19378734474321e-06, "loss": 0.7404, "step": 7859 }, { "epoch": 0.5, "grad_norm": 1.493892717345176, "learning_rate": 5.192751563824884e-06, "loss": 0.6793, "step": 7860 }, { "epoch": 0.5, "grad_norm": 1.583407203606205, "learning_rate": 5.1917157746225e-06, "loss": 0.7054, "step": 7861 }, { "epoch": 0.5, "grad_norm": 1.4659768945620801, "learning_rate": 5.190679977180574e-06, "loss": 0.5313, "step": 7862 }, { "epoch": 0.5, "grad_norm": 1.3134838042750836, "learning_rate": 5.189644171543624e-06, "loss": 0.67, "step": 7863 }, { "epoch": 0.5, "grad_norm": 1.6211945512363157, "learning_rate": 5.188608357756164e-06, "loss": 0.6901, "step": 7864 }, { "epoch": 0.5, "grad_norm": 1.5966285964056253, "learning_rate": 5.187572535862715e-06, "loss": 0.7771, "step": 7865 }, { "epoch": 0.5, "grad_norm": 1.396570784178305, "learning_rate": 5.186536705907792e-06, "loss": 0.684, "step": 7866 }, { "epoch": 0.5, "grad_norm": 1.7882652357563869, "learning_rate": 5.185500867935913e-06, "loss": 0.6873, "step": 7867 }, { "epoch": 0.5, "grad_norm": 1.5445413972205182, "learning_rate": 5.184465021991597e-06, "loss": 0.6244, "step": 7868 }, { "epoch": 0.5, "grad_norm": 1.5888485607953315, "learning_rate": 5.183429168119362e-06, "loss": 0.595, "step": 7869 }, { "epoch": 0.5, "grad_norm": 1.5893141150356158, "learning_rate": 5.182393306363728e-06, "loss": 0.7496, "step": 7870 }, { "epoch": 0.5, "grad_norm": 1.24993825565779, "learning_rate": 5.181357436769211e-06, "loss": 0.6084, "step": 7871 }, { "epoch": 0.5, "grad_norm": 1.6006277234811728, "learning_rate": 5.180321559380332e-06, "loss": 0.8049, "step": 7872 }, { "epoch": 0.5, "grad_norm": 1.5613268936618738, "learning_rate": 5.1792856742416145e-06, "loss": 0.6412, "step": 7873 }, { "epoch": 0.5, "grad_norm": 2.05039001410489, "learning_rate": 5.178249781397573e-06, "loss": 0.6786, "step": 7874 }, { "epoch": 0.5, "grad_norm": 1.5027831451381437, "learning_rate": 5.17721388089273e-06, "loss": 0.6975, "step": 7875 }, { "epoch": 0.5, "grad_norm": 1.5746522767197562, "learning_rate": 5.176177972771608e-06, "loss": 0.6943, "step": 7876 }, { "epoch": 0.5, "grad_norm": 1.5297423611463594, "learning_rate": 5.175142057078727e-06, "loss": 0.7184, "step": 7877 }, { "epoch": 0.5, "grad_norm": 1.6069673273264644, "learning_rate": 5.174106133858607e-06, "loss": 0.6081, "step": 7878 }, { "epoch": 0.5, "grad_norm": 1.5441310123275518, "learning_rate": 5.173070203155772e-06, "loss": 0.6853, "step": 7879 }, { "epoch": 0.5, "grad_norm": 1.5372577217252061, "learning_rate": 5.172034265014743e-06, "loss": 0.6688, "step": 7880 }, { "epoch": 0.5, "grad_norm": 1.6795178636594583, "learning_rate": 5.170998319480044e-06, "loss": 0.6294, "step": 7881 }, { "epoch": 0.5, "grad_norm": 1.576013744700416, "learning_rate": 5.1699623665961965e-06, "loss": 0.6579, "step": 7882 }, { "epoch": 0.5, "grad_norm": 1.6147138600343371, "learning_rate": 5.168926406407723e-06, "loss": 0.7314, "step": 7883 }, { "epoch": 0.5, "grad_norm": 1.6136360258835456, "learning_rate": 5.1678904389591474e-06, "loss": 0.6392, "step": 7884 }, { "epoch": 0.5, "grad_norm": 1.6453746876731177, "learning_rate": 5.166854464294993e-06, "loss": 0.7204, "step": 7885 }, { "epoch": 0.5, "grad_norm": 1.4589692064840585, "learning_rate": 5.165818482459784e-06, "loss": 0.6325, "step": 7886 }, { "epoch": 0.5, "grad_norm": 1.708204375962308, "learning_rate": 5.164782493498046e-06, "loss": 0.7005, "step": 7887 }, { "epoch": 0.5, "grad_norm": 1.541794447238433, "learning_rate": 5.163746497454304e-06, "loss": 0.7054, "step": 7888 }, { "epoch": 0.5, "grad_norm": 1.5117886527746356, "learning_rate": 5.16271049437308e-06, "loss": 0.611, "step": 7889 }, { "epoch": 0.51, "grad_norm": 1.565661734367574, "learning_rate": 5.161674484298902e-06, "loss": 0.5991, "step": 7890 }, { "epoch": 0.51, "grad_norm": 1.5438190816371342, "learning_rate": 5.160638467276293e-06, "loss": 0.7064, "step": 7891 }, { "epoch": 0.51, "grad_norm": 1.4328345436971526, "learning_rate": 5.159602443349781e-06, "loss": 0.6848, "step": 7892 }, { "epoch": 0.51, "grad_norm": 1.0896237242694586, "learning_rate": 5.15856641256389e-06, "loss": 0.5503, "step": 7893 }, { "epoch": 0.51, "grad_norm": 1.4293087666029833, "learning_rate": 5.157530374963149e-06, "loss": 0.6993, "step": 7894 }, { "epoch": 0.51, "grad_norm": 1.5646450460727581, "learning_rate": 5.156494330592084e-06, "loss": 0.7278, "step": 7895 }, { "epoch": 0.51, "grad_norm": 1.5191305342529366, "learning_rate": 5.15545827949522e-06, "loss": 0.7472, "step": 7896 }, { "epoch": 0.51, "grad_norm": 1.6389847462338891, "learning_rate": 5.154422221717087e-06, "loss": 0.665, "step": 7897 }, { "epoch": 0.51, "grad_norm": 1.6353478546385707, "learning_rate": 5.153386157302211e-06, "loss": 0.5917, "step": 7898 }, { "epoch": 0.51, "grad_norm": 1.1803875723074895, "learning_rate": 5.152350086295121e-06, "loss": 0.6301, "step": 7899 }, { "epoch": 0.51, "grad_norm": 1.6412169782915464, "learning_rate": 5.151314008740343e-06, "loss": 0.6993, "step": 7900 }, { "epoch": 0.51, "grad_norm": 1.872306571973257, "learning_rate": 5.150277924682408e-06, "loss": 0.7493, "step": 7901 }, { "epoch": 0.51, "grad_norm": 1.6650115439632152, "learning_rate": 5.149241834165844e-06, "loss": 0.7095, "step": 7902 }, { "epoch": 0.51, "grad_norm": 1.518233306975616, "learning_rate": 5.148205737235178e-06, "loss": 0.5564, "step": 7903 }, { "epoch": 0.51, "grad_norm": 1.7810888958597553, "learning_rate": 5.147169633934942e-06, "loss": 0.579, "step": 7904 }, { "epoch": 0.51, "grad_norm": 1.5120015941778788, "learning_rate": 5.146133524309664e-06, "loss": 0.6539, "step": 7905 }, { "epoch": 0.51, "grad_norm": 2.1103710888545777, "learning_rate": 5.145097408403874e-06, "loss": 0.7917, "step": 7906 }, { "epoch": 0.51, "grad_norm": 5.7497827382786015, "learning_rate": 5.144061286262102e-06, "loss": 0.6628, "step": 7907 }, { "epoch": 0.51, "grad_norm": 3.4025980300995693, "learning_rate": 5.1430251579288794e-06, "loss": 0.7029, "step": 7908 }, { "epoch": 0.51, "grad_norm": 2.615457871192254, "learning_rate": 5.141989023448736e-06, "loss": 0.6505, "step": 7909 }, { "epoch": 0.51, "grad_norm": 1.0834733760939743, "learning_rate": 5.1409528828662025e-06, "loss": 0.5942, "step": 7910 }, { "epoch": 0.51, "grad_norm": 1.516453030403703, "learning_rate": 5.139916736225811e-06, "loss": 0.6398, "step": 7911 }, { "epoch": 0.51, "grad_norm": 1.4930405557154485, "learning_rate": 5.13888058357209e-06, "loss": 0.6642, "step": 7912 }, { "epoch": 0.51, "grad_norm": 1.4613170461736078, "learning_rate": 5.137844424949576e-06, "loss": 0.6792, "step": 7913 }, { "epoch": 0.51, "grad_norm": 1.502359818717975, "learning_rate": 5.136808260402797e-06, "loss": 0.6859, "step": 7914 }, { "epoch": 0.51, "grad_norm": 1.3748380657851877, "learning_rate": 5.135772089976285e-06, "loss": 0.6786, "step": 7915 }, { "epoch": 0.51, "grad_norm": 1.653621702858384, "learning_rate": 5.134735913714577e-06, "loss": 0.6665, "step": 7916 }, { "epoch": 0.51, "grad_norm": 1.589818191612316, "learning_rate": 5.133699731662201e-06, "loss": 0.7523, "step": 7917 }, { "epoch": 0.51, "grad_norm": 1.5165497136612784, "learning_rate": 5.132663543863692e-06, "loss": 0.6458, "step": 7918 }, { "epoch": 0.51, "grad_norm": 2.402738651012152, "learning_rate": 5.131627350363582e-06, "loss": 0.5999, "step": 7919 }, { "epoch": 0.51, "grad_norm": 1.2401922083534027, "learning_rate": 5.130591151206405e-06, "loss": 0.6338, "step": 7920 }, { "epoch": 0.51, "grad_norm": 1.4363053974707654, "learning_rate": 5.1295549464366944e-06, "loss": 0.6815, "step": 7921 }, { "epoch": 0.51, "grad_norm": 1.8130061818695764, "learning_rate": 5.128518736098984e-06, "loss": 0.634, "step": 7922 }, { "epoch": 0.51, "grad_norm": 1.469454458990408, "learning_rate": 5.1274825202378085e-06, "loss": 0.6986, "step": 7923 }, { "epoch": 0.51, "grad_norm": 1.4676589077043962, "learning_rate": 5.126446298897704e-06, "loss": 0.6511, "step": 7924 }, { "epoch": 0.51, "grad_norm": 1.3977727440233247, "learning_rate": 5.125410072123203e-06, "loss": 0.6549, "step": 7925 }, { "epoch": 0.51, "grad_norm": 1.7430567146546734, "learning_rate": 5.12437383995884e-06, "loss": 0.6486, "step": 7926 }, { "epoch": 0.51, "grad_norm": 1.1431853499729712, "learning_rate": 5.12333760244915e-06, "loss": 0.5824, "step": 7927 }, { "epoch": 0.51, "grad_norm": 1.574891125985903, "learning_rate": 5.122301359638669e-06, "loss": 0.6817, "step": 7928 }, { "epoch": 0.51, "grad_norm": 1.4502884851181255, "learning_rate": 5.121265111571933e-06, "loss": 0.709, "step": 7929 }, { "epoch": 0.51, "grad_norm": 1.6624044879628508, "learning_rate": 5.120228858293477e-06, "loss": 0.7025, "step": 7930 }, { "epoch": 0.51, "grad_norm": 3.139929818250216, "learning_rate": 5.119192599847838e-06, "loss": 0.7212, "step": 7931 }, { "epoch": 0.51, "grad_norm": 1.2954337290422029, "learning_rate": 5.118156336279552e-06, "loss": 0.6133, "step": 7932 }, { "epoch": 0.51, "grad_norm": 1.664753722633929, "learning_rate": 5.1171200676331535e-06, "loss": 0.6793, "step": 7933 }, { "epoch": 0.51, "grad_norm": 1.4322750401443345, "learning_rate": 5.116083793953181e-06, "loss": 0.6899, "step": 7934 }, { "epoch": 0.51, "grad_norm": 1.8824813964120366, "learning_rate": 5.115047515284173e-06, "loss": 0.7538, "step": 7935 }, { "epoch": 0.51, "grad_norm": 1.624704833511749, "learning_rate": 5.1140112316706615e-06, "loss": 0.6089, "step": 7936 }, { "epoch": 0.51, "grad_norm": 1.6691616078252425, "learning_rate": 5.112974943157188e-06, "loss": 0.7369, "step": 7937 }, { "epoch": 0.51, "grad_norm": 1.469288685726049, "learning_rate": 5.1119386497882896e-06, "loss": 0.6754, "step": 7938 }, { "epoch": 0.51, "grad_norm": 1.4386090907159534, "learning_rate": 5.110902351608504e-06, "loss": 0.6255, "step": 7939 }, { "epoch": 0.51, "grad_norm": 1.548672293890575, "learning_rate": 5.109866048662365e-06, "loss": 0.7712, "step": 7940 }, { "epoch": 0.51, "grad_norm": 2.181801928895872, "learning_rate": 5.108829740994417e-06, "loss": 0.7453, "step": 7941 }, { "epoch": 0.51, "grad_norm": 1.6131423232704474, "learning_rate": 5.107793428649194e-06, "loss": 0.692, "step": 7942 }, { "epoch": 0.51, "grad_norm": 1.61832637453446, "learning_rate": 5.106757111671235e-06, "loss": 0.7468, "step": 7943 }, { "epoch": 0.51, "grad_norm": 1.8006258666882289, "learning_rate": 5.105720790105082e-06, "loss": 0.8488, "step": 7944 }, { "epoch": 0.51, "grad_norm": 1.9063285623383404, "learning_rate": 5.104684463995271e-06, "loss": 0.81, "step": 7945 }, { "epoch": 0.51, "grad_norm": 1.0495766066862806, "learning_rate": 5.103648133386342e-06, "loss": 0.6556, "step": 7946 }, { "epoch": 0.51, "grad_norm": 1.7979393839666875, "learning_rate": 5.102611798322833e-06, "loss": 0.7446, "step": 7947 }, { "epoch": 0.51, "grad_norm": 1.1919424447422786, "learning_rate": 5.101575458849285e-06, "loss": 0.6408, "step": 7948 }, { "epoch": 0.51, "grad_norm": 1.4446587131908617, "learning_rate": 5.100539115010237e-06, "loss": 0.745, "step": 7949 }, { "epoch": 0.51, "grad_norm": 1.5381873191168514, "learning_rate": 5.09950276685023e-06, "loss": 0.6119, "step": 7950 }, { "epoch": 0.51, "grad_norm": 1.1899116602523716, "learning_rate": 5.098466414413801e-06, "loss": 0.7048, "step": 7951 }, { "epoch": 0.51, "grad_norm": 1.3263580187978428, "learning_rate": 5.097430057745494e-06, "loss": 0.7661, "step": 7952 }, { "epoch": 0.51, "grad_norm": 1.641722733026959, "learning_rate": 5.096393696889848e-06, "loss": 0.6655, "step": 7953 }, { "epoch": 0.51, "grad_norm": 1.4606224899451388, "learning_rate": 5.095357331891403e-06, "loss": 0.7027, "step": 7954 }, { "epoch": 0.51, "grad_norm": 1.4925068634449035, "learning_rate": 5.094320962794701e-06, "loss": 0.6866, "step": 7955 }, { "epoch": 0.51, "grad_norm": 1.4630189325166947, "learning_rate": 5.093284589644282e-06, "loss": 0.6197, "step": 7956 }, { "epoch": 0.51, "grad_norm": 1.4822713905746634, "learning_rate": 5.092248212484686e-06, "loss": 0.6, "step": 7957 }, { "epoch": 0.51, "grad_norm": 1.5642698367064234, "learning_rate": 5.0912118313604564e-06, "loss": 0.7274, "step": 7958 }, { "epoch": 0.51, "grad_norm": 1.4582653495857836, "learning_rate": 5.090175446316133e-06, "loss": 0.7125, "step": 7959 }, { "epoch": 0.51, "grad_norm": 1.5445842434540866, "learning_rate": 5.0891390573962605e-06, "loss": 0.7829, "step": 7960 }, { "epoch": 0.51, "grad_norm": 1.5485508402940906, "learning_rate": 5.088102664645378e-06, "loss": 0.6913, "step": 7961 }, { "epoch": 0.51, "grad_norm": 1.2849662088351141, "learning_rate": 5.087066268108027e-06, "loss": 0.7326, "step": 7962 }, { "epoch": 0.51, "grad_norm": 1.563404526375281, "learning_rate": 5.086029867828752e-06, "loss": 0.6797, "step": 7963 }, { "epoch": 0.51, "grad_norm": 1.667452555910237, "learning_rate": 5.084993463852094e-06, "loss": 0.6536, "step": 7964 }, { "epoch": 0.51, "grad_norm": 1.6572746363020612, "learning_rate": 5.083957056222595e-06, "loss": 0.7598, "step": 7965 }, { "epoch": 0.51, "grad_norm": 1.5850941251896873, "learning_rate": 5.082920644984798e-06, "loss": 0.6137, "step": 7966 }, { "epoch": 0.51, "grad_norm": 1.7186033812437083, "learning_rate": 5.081884230183247e-06, "loss": 0.6886, "step": 7967 }, { "epoch": 0.51, "grad_norm": 1.674369963503513, "learning_rate": 5.080847811862482e-06, "loss": 0.6923, "step": 7968 }, { "epoch": 0.51, "grad_norm": 1.696803601823879, "learning_rate": 5.079811390067049e-06, "loss": 0.6695, "step": 7969 }, { "epoch": 0.51, "grad_norm": 1.5775692431728863, "learning_rate": 5.078774964841491e-06, "loss": 0.6489, "step": 7970 }, { "epoch": 0.51, "grad_norm": 1.4943883227098018, "learning_rate": 5.07773853623035e-06, "loss": 0.6717, "step": 7971 }, { "epoch": 0.51, "grad_norm": 1.6410586608952114, "learning_rate": 5.076702104278168e-06, "loss": 0.6036, "step": 7972 }, { "epoch": 0.51, "grad_norm": 2.168930821847232, "learning_rate": 5.075665669029494e-06, "loss": 0.6984, "step": 7973 }, { "epoch": 0.51, "grad_norm": 1.4392499845561697, "learning_rate": 5.074629230528867e-06, "loss": 0.7195, "step": 7974 }, { "epoch": 0.51, "grad_norm": 1.5501210936876801, "learning_rate": 5.073592788820832e-06, "loss": 0.7609, "step": 7975 }, { "epoch": 0.51, "grad_norm": 1.5845855590129316, "learning_rate": 5.072556343949935e-06, "loss": 0.6799, "step": 7976 }, { "epoch": 0.51, "grad_norm": 1.4624437067703704, "learning_rate": 5.071519895960717e-06, "loss": 0.6565, "step": 7977 }, { "epoch": 0.51, "grad_norm": 1.5380905103487972, "learning_rate": 5.070483444897725e-06, "loss": 0.6029, "step": 7978 }, { "epoch": 0.51, "grad_norm": 1.1637672356652944, "learning_rate": 5.069446990805501e-06, "loss": 0.7078, "step": 7979 }, { "epoch": 0.51, "grad_norm": 1.7073023712190643, "learning_rate": 5.068410533728594e-06, "loss": 0.7018, "step": 7980 }, { "epoch": 0.51, "grad_norm": 1.258644021231484, "learning_rate": 5.067374073711545e-06, "loss": 0.801, "step": 7981 }, { "epoch": 0.51, "grad_norm": 1.60628742077129, "learning_rate": 5.0663376107989e-06, "loss": 0.6356, "step": 7982 }, { "epoch": 0.51, "grad_norm": 1.2353379161442535, "learning_rate": 5.065301145035204e-06, "loss": 0.6104, "step": 7983 }, { "epoch": 0.51, "grad_norm": 1.5147428781978485, "learning_rate": 5.064264676465002e-06, "loss": 0.6803, "step": 7984 }, { "epoch": 0.51, "grad_norm": 1.7464931754564632, "learning_rate": 5.063228205132838e-06, "loss": 0.7198, "step": 7985 }, { "epoch": 0.51, "grad_norm": 1.428287277648902, "learning_rate": 5.06219173108326e-06, "loss": 0.6792, "step": 7986 }, { "epoch": 0.51, "grad_norm": 1.5939587878656993, "learning_rate": 5.061155254360811e-06, "loss": 0.7374, "step": 7987 }, { "epoch": 0.51, "grad_norm": 1.4568848918019013, "learning_rate": 5.060118775010037e-06, "loss": 0.632, "step": 7988 }, { "epoch": 0.51, "grad_norm": 1.602706170242459, "learning_rate": 5.059082293075487e-06, "loss": 0.7203, "step": 7989 }, { "epoch": 0.51, "grad_norm": 1.4542057195287579, "learning_rate": 5.058045808601704e-06, "loss": 0.7156, "step": 7990 }, { "epoch": 0.51, "grad_norm": 1.6179913989747445, "learning_rate": 5.057009321633233e-06, "loss": 0.7308, "step": 7991 }, { "epoch": 0.51, "grad_norm": 1.467756090276258, "learning_rate": 5.0559728322146204e-06, "loss": 0.6432, "step": 7992 }, { "epoch": 0.51, "grad_norm": 1.5352844300176294, "learning_rate": 5.054936340390414e-06, "loss": 0.654, "step": 7993 }, { "epoch": 0.51, "grad_norm": 1.4316070359847557, "learning_rate": 5.0538998462051595e-06, "loss": 0.6208, "step": 7994 }, { "epoch": 0.51, "grad_norm": 1.4971777707165703, "learning_rate": 5.052863349703402e-06, "loss": 0.6874, "step": 7995 }, { "epoch": 0.51, "grad_norm": 1.4118897327117808, "learning_rate": 5.051826850929688e-06, "loss": 0.604, "step": 7996 }, { "epoch": 0.51, "grad_norm": 1.4179032306937716, "learning_rate": 5.050790349928566e-06, "loss": 0.7061, "step": 7997 }, { "epoch": 0.51, "grad_norm": 1.3738640421918993, "learning_rate": 5.049753846744582e-06, "loss": 0.6492, "step": 7998 }, { "epoch": 0.51, "grad_norm": 1.46258762130538, "learning_rate": 5.048717341422282e-06, "loss": 0.7007, "step": 7999 }, { "epoch": 0.51, "grad_norm": 1.5252305191034563, "learning_rate": 5.047680834006212e-06, "loss": 0.7455, "step": 8000 }, { "epoch": 0.51, "grad_norm": 1.1036330334998063, "learning_rate": 5.0466443245409215e-06, "loss": 0.6575, "step": 8001 }, { "epoch": 0.51, "grad_norm": 1.5560763206995119, "learning_rate": 5.045607813070955e-06, "loss": 0.6909, "step": 8002 }, { "epoch": 0.51, "grad_norm": 1.5618872586669765, "learning_rate": 5.04457129964086e-06, "loss": 0.6809, "step": 8003 }, { "epoch": 0.51, "grad_norm": 1.5630927371001904, "learning_rate": 5.043534784295185e-06, "loss": 0.6353, "step": 8004 }, { "epoch": 0.51, "grad_norm": 1.6403507001961768, "learning_rate": 5.042498267078476e-06, "loss": 0.69, "step": 8005 }, { "epoch": 0.51, "grad_norm": 1.6892962647188345, "learning_rate": 5.041461748035281e-06, "loss": 0.6505, "step": 8006 }, { "epoch": 0.51, "grad_norm": 1.7668522284514756, "learning_rate": 5.040425227210148e-06, "loss": 0.8213, "step": 8007 }, { "epoch": 0.51, "grad_norm": 1.5834954421376464, "learning_rate": 5.039388704647622e-06, "loss": 0.707, "step": 8008 }, { "epoch": 0.51, "grad_norm": 1.624432376853667, "learning_rate": 5.038352180392254e-06, "loss": 0.6836, "step": 8009 }, { "epoch": 0.51, "grad_norm": 1.5824837068367568, "learning_rate": 5.037315654488589e-06, "loss": 0.7455, "step": 8010 }, { "epoch": 0.51, "grad_norm": 1.4350239858738527, "learning_rate": 5.036279126981176e-06, "loss": 0.6009, "step": 8011 }, { "epoch": 0.51, "grad_norm": 1.260041838895007, "learning_rate": 5.0352425979145626e-06, "loss": 0.6419, "step": 8012 }, { "epoch": 0.51, "grad_norm": 1.4347378205662071, "learning_rate": 5.034206067333296e-06, "loss": 0.6883, "step": 8013 }, { "epoch": 0.51, "grad_norm": 1.5599773587419603, "learning_rate": 5.0331695352819236e-06, "loss": 0.7246, "step": 8014 }, { "epoch": 0.51, "grad_norm": 1.6320166017243904, "learning_rate": 5.032133001804994e-06, "loss": 0.7738, "step": 8015 }, { "epoch": 0.51, "grad_norm": 1.299050872760153, "learning_rate": 5.031096466947056e-06, "loss": 0.6784, "step": 8016 }, { "epoch": 0.51, "grad_norm": 1.5385988967552868, "learning_rate": 5.030059930752659e-06, "loss": 0.7859, "step": 8017 }, { "epoch": 0.51, "grad_norm": 1.0845136515725622, "learning_rate": 5.029023393266348e-06, "loss": 0.6857, "step": 8018 }, { "epoch": 0.51, "grad_norm": 1.5573124654550368, "learning_rate": 5.027986854532673e-06, "loss": 0.7142, "step": 8019 }, { "epoch": 0.51, "grad_norm": 1.452202248344379, "learning_rate": 5.026950314596181e-06, "loss": 0.6878, "step": 8020 }, { "epoch": 0.51, "grad_norm": 1.8656175333874032, "learning_rate": 5.025913773501421e-06, "loss": 0.6229, "step": 8021 }, { "epoch": 0.51, "grad_norm": 1.5161348832250936, "learning_rate": 5.024877231292941e-06, "loss": 0.6467, "step": 8022 }, { "epoch": 0.51, "grad_norm": 1.6288030207429394, "learning_rate": 5.023840688015291e-06, "loss": 0.7785, "step": 8023 }, { "epoch": 0.51, "grad_norm": 1.4996937806361497, "learning_rate": 5.022804143713018e-06, "loss": 0.73, "step": 8024 }, { "epoch": 0.51, "grad_norm": 1.5442202873955122, "learning_rate": 5.021767598430672e-06, "loss": 0.6743, "step": 8025 }, { "epoch": 0.51, "grad_norm": 1.5890021918088055, "learning_rate": 5.0207310522128e-06, "loss": 0.6223, "step": 8026 }, { "epoch": 0.51, "grad_norm": 1.4871605139581672, "learning_rate": 5.019694505103949e-06, "loss": 0.7032, "step": 8027 }, { "epoch": 0.51, "grad_norm": 1.546904658708698, "learning_rate": 5.018657957148672e-06, "loss": 0.736, "step": 8028 }, { "epoch": 0.51, "grad_norm": 1.1283820877183255, "learning_rate": 5.017621408391513e-06, "loss": 0.5449, "step": 8029 }, { "epoch": 0.51, "grad_norm": 1.0001825144105492, "learning_rate": 5.016584858877024e-06, "loss": 0.6349, "step": 8030 }, { "epoch": 0.51, "grad_norm": 1.6649453458306476, "learning_rate": 5.015548308649752e-06, "loss": 0.7415, "step": 8031 }, { "epoch": 0.51, "grad_norm": 1.4159497904962044, "learning_rate": 5.014511757754248e-06, "loss": 0.6689, "step": 8032 }, { "epoch": 0.51, "grad_norm": 1.7084449720708683, "learning_rate": 5.013475206235056e-06, "loss": 0.7855, "step": 8033 }, { "epoch": 0.51, "grad_norm": 1.6332618576084403, "learning_rate": 5.012438654136731e-06, "loss": 0.6123, "step": 8034 }, { "epoch": 0.51, "grad_norm": 1.593109638353535, "learning_rate": 5.0114021015038185e-06, "loss": 0.6647, "step": 8035 }, { "epoch": 0.51, "grad_norm": 2.0339485419948593, "learning_rate": 5.010365548380865e-06, "loss": 0.64, "step": 8036 }, { "epoch": 0.51, "grad_norm": 1.5009123815124705, "learning_rate": 5.0093289948124244e-06, "loss": 0.6664, "step": 8037 }, { "epoch": 0.51, "grad_norm": 1.3479073640738024, "learning_rate": 5.008292440843043e-06, "loss": 0.6764, "step": 8038 }, { "epoch": 0.51, "grad_norm": 1.4476838412961937, "learning_rate": 5.00725588651727e-06, "loss": 0.6586, "step": 8039 }, { "epoch": 0.51, "grad_norm": 1.5035067256404546, "learning_rate": 5.006219331879655e-06, "loss": 0.7159, "step": 8040 }, { "epoch": 0.51, "grad_norm": 1.3717192878199977, "learning_rate": 5.005182776974745e-06, "loss": 0.6356, "step": 8041 }, { "epoch": 0.51, "grad_norm": 1.2771302672701668, "learning_rate": 5.00414622184709e-06, "loss": 0.672, "step": 8042 }, { "epoch": 0.51, "grad_norm": 1.1245426842026272, "learning_rate": 5.0031096665412385e-06, "loss": 0.6689, "step": 8043 }, { "epoch": 0.51, "grad_norm": 1.4999776339157742, "learning_rate": 5.002073111101741e-06, "loss": 0.6433, "step": 8044 }, { "epoch": 0.51, "grad_norm": 1.411500911512476, "learning_rate": 5.0010365555731455e-06, "loss": 0.6326, "step": 8045 }, { "epoch": 0.51, "grad_norm": 1.3831955189141334, "learning_rate": 5e-06, "loss": 0.69, "step": 8046 }, { "epoch": 0.52, "grad_norm": 1.388596027441784, "learning_rate": 4.998963444426855e-06, "loss": 0.6406, "step": 8047 }, { "epoch": 0.52, "grad_norm": 1.4915198062325359, "learning_rate": 4.997926888898262e-06, "loss": 0.6605, "step": 8048 }, { "epoch": 0.52, "grad_norm": 1.0787945134356793, "learning_rate": 4.996890333458762e-06, "loss": 0.6543, "step": 8049 }, { "epoch": 0.52, "grad_norm": 1.4196668503240584, "learning_rate": 4.995853778152912e-06, "loss": 0.6255, "step": 8050 }, { "epoch": 0.52, "grad_norm": 1.6284483244922456, "learning_rate": 4.994817223025256e-06, "loss": 0.6671, "step": 8051 }, { "epoch": 0.52, "grad_norm": 1.4028032843177973, "learning_rate": 4.993780668120347e-06, "loss": 0.6777, "step": 8052 }, { "epoch": 0.52, "grad_norm": 1.1618057864354068, "learning_rate": 4.992744113482731e-06, "loss": 0.6957, "step": 8053 }, { "epoch": 0.52, "grad_norm": 1.5396220998253491, "learning_rate": 4.9917075591569594e-06, "loss": 0.8485, "step": 8054 }, { "epoch": 0.52, "grad_norm": 1.5532236339009808, "learning_rate": 4.990671005187576e-06, "loss": 0.6852, "step": 8055 }, { "epoch": 0.52, "grad_norm": 1.5217270170883186, "learning_rate": 4.989634451619135e-06, "loss": 0.6248, "step": 8056 }, { "epoch": 0.52, "grad_norm": 1.767945645367558, "learning_rate": 4.988597898496183e-06, "loss": 0.6903, "step": 8057 }, { "epoch": 0.52, "grad_norm": 1.581095323565533, "learning_rate": 4.987561345863269e-06, "loss": 0.6409, "step": 8058 }, { "epoch": 0.52, "grad_norm": 1.5289906299295866, "learning_rate": 4.9865247937649445e-06, "loss": 0.661, "step": 8059 }, { "epoch": 0.52, "grad_norm": 1.7569145530789276, "learning_rate": 4.985488242245753e-06, "loss": 0.6644, "step": 8060 }, { "epoch": 0.52, "grad_norm": 1.2023844073309684, "learning_rate": 4.98445169135025e-06, "loss": 0.6095, "step": 8061 }, { "epoch": 0.52, "grad_norm": 1.8338483342010334, "learning_rate": 4.9834151411229775e-06, "loss": 0.6585, "step": 8062 }, { "epoch": 0.52, "grad_norm": 1.4786357292642425, "learning_rate": 4.982378591608489e-06, "loss": 0.6701, "step": 8063 }, { "epoch": 0.52, "grad_norm": 0.9774314056326663, "learning_rate": 4.98134204285133e-06, "loss": 0.6436, "step": 8064 }, { "epoch": 0.52, "grad_norm": 1.4394063689559793, "learning_rate": 4.980305494896052e-06, "loss": 0.6588, "step": 8065 }, { "epoch": 0.52, "grad_norm": 1.4918372405147644, "learning_rate": 4.979268947787203e-06, "loss": 0.6197, "step": 8066 }, { "epoch": 0.52, "grad_norm": 1.0430479840916669, "learning_rate": 4.978232401569329e-06, "loss": 0.5831, "step": 8067 }, { "epoch": 0.52, "grad_norm": 1.3001455955154506, "learning_rate": 4.977195856286983e-06, "loss": 0.6613, "step": 8068 }, { "epoch": 0.52, "grad_norm": 1.460994565405388, "learning_rate": 4.97615931198471e-06, "loss": 0.6351, "step": 8069 }, { "epoch": 0.52, "grad_norm": 1.5871613112365963, "learning_rate": 4.975122768707061e-06, "loss": 0.7615, "step": 8070 }, { "epoch": 0.52, "grad_norm": 1.546164418922822, "learning_rate": 4.974086226498581e-06, "loss": 0.7019, "step": 8071 }, { "epoch": 0.52, "grad_norm": 1.6067634342170816, "learning_rate": 4.9730496854038215e-06, "loss": 0.5546, "step": 8072 }, { "epoch": 0.52, "grad_norm": 1.4472501402079385, "learning_rate": 4.972013145467329e-06, "loss": 0.6464, "step": 8073 }, { "epoch": 0.52, "grad_norm": 1.4306584410423797, "learning_rate": 4.970976606733653e-06, "loss": 0.7231, "step": 8074 }, { "epoch": 0.52, "grad_norm": 1.4866041721685337, "learning_rate": 4.969940069247343e-06, "loss": 0.6532, "step": 8075 }, { "epoch": 0.52, "grad_norm": 1.4854159719569804, "learning_rate": 4.968903533052944e-06, "loss": 0.7306, "step": 8076 }, { "epoch": 0.52, "grad_norm": 1.5110595749770186, "learning_rate": 4.9678669981950076e-06, "loss": 0.7046, "step": 8077 }, { "epoch": 0.52, "grad_norm": 1.0794856306021765, "learning_rate": 4.966830464718078e-06, "loss": 0.6101, "step": 8078 }, { "epoch": 0.52, "grad_norm": 1.3074760315292662, "learning_rate": 4.965793932666707e-06, "loss": 0.6742, "step": 8079 }, { "epoch": 0.52, "grad_norm": 1.3203455670618598, "learning_rate": 4.964757402085439e-06, "loss": 0.6177, "step": 8080 }, { "epoch": 0.52, "grad_norm": 1.487340216553944, "learning_rate": 4.963720873018826e-06, "loss": 0.6185, "step": 8081 }, { "epoch": 0.52, "grad_norm": 1.589559096091257, "learning_rate": 4.9626843455114125e-06, "loss": 0.6858, "step": 8082 }, { "epoch": 0.52, "grad_norm": 1.4344111129047346, "learning_rate": 4.961647819607749e-06, "loss": 0.6396, "step": 8083 }, { "epoch": 0.52, "grad_norm": 1.503799609823922, "learning_rate": 4.96061129535238e-06, "loss": 0.71, "step": 8084 }, { "epoch": 0.52, "grad_norm": 1.0530475089925084, "learning_rate": 4.959574772789853e-06, "loss": 0.7485, "step": 8085 }, { "epoch": 0.52, "grad_norm": 1.7713742688895966, "learning_rate": 4.95853825196472e-06, "loss": 0.6641, "step": 8086 }, { "epoch": 0.52, "grad_norm": 1.7356148626958565, "learning_rate": 4.957501732921524e-06, "loss": 0.7276, "step": 8087 }, { "epoch": 0.52, "grad_norm": 1.5884427556070788, "learning_rate": 4.9564652157048166e-06, "loss": 0.6958, "step": 8088 }, { "epoch": 0.52, "grad_norm": 1.8934874710914507, "learning_rate": 4.955428700359141e-06, "loss": 0.666, "step": 8089 }, { "epoch": 0.52, "grad_norm": 1.0866464117259897, "learning_rate": 4.9543921869290475e-06, "loss": 0.6621, "step": 8090 }, { "epoch": 0.52, "grad_norm": 1.4491864842215512, "learning_rate": 4.953355675459081e-06, "loss": 0.6943, "step": 8091 }, { "epoch": 0.52, "grad_norm": 1.646362134903072, "learning_rate": 4.952319165993789e-06, "loss": 0.7456, "step": 8092 }, { "epoch": 0.52, "grad_norm": 1.7788796897558836, "learning_rate": 4.951282658577719e-06, "loss": 0.8153, "step": 8093 }, { "epoch": 0.52, "grad_norm": 1.7204910441467043, "learning_rate": 4.950246153255418e-06, "loss": 0.7545, "step": 8094 }, { "epoch": 0.52, "grad_norm": 1.5545935080573092, "learning_rate": 4.9492096500714346e-06, "loss": 0.6368, "step": 8095 }, { "epoch": 0.52, "grad_norm": 1.2219435090669426, "learning_rate": 4.9481731490703124e-06, "loss": 0.687, "step": 8096 }, { "epoch": 0.52, "grad_norm": 1.4311078180084507, "learning_rate": 4.947136650296601e-06, "loss": 0.6847, "step": 8097 }, { "epoch": 0.52, "grad_norm": 1.6578297149542847, "learning_rate": 4.946100153794843e-06, "loss": 0.7161, "step": 8098 }, { "epoch": 0.52, "grad_norm": 1.6836527808890778, "learning_rate": 4.945063659609588e-06, "loss": 0.6162, "step": 8099 }, { "epoch": 0.52, "grad_norm": 1.5779275106965156, "learning_rate": 4.94402716778538e-06, "loss": 0.6739, "step": 8100 }, { "epoch": 0.52, "grad_norm": 1.2246486200133755, "learning_rate": 4.94299067836677e-06, "loss": 0.6506, "step": 8101 }, { "epoch": 0.52, "grad_norm": 1.813673372076217, "learning_rate": 4.9419541913982984e-06, "loss": 0.8365, "step": 8102 }, { "epoch": 0.52, "grad_norm": 1.5206709857079979, "learning_rate": 4.940917706924513e-06, "loss": 0.73, "step": 8103 }, { "epoch": 0.52, "grad_norm": 1.5786347213758678, "learning_rate": 4.939881224989963e-06, "loss": 0.6874, "step": 8104 }, { "epoch": 0.52, "grad_norm": 1.2232282370906598, "learning_rate": 4.93884474563919e-06, "loss": 0.6347, "step": 8105 }, { "epoch": 0.52, "grad_norm": 1.672239893777867, "learning_rate": 4.9378082689167415e-06, "loss": 0.6873, "step": 8106 }, { "epoch": 0.52, "grad_norm": 1.6026252593518142, "learning_rate": 4.936771794867163e-06, "loss": 0.6429, "step": 8107 }, { "epoch": 0.52, "grad_norm": 1.4700355738268251, "learning_rate": 4.935735323535e-06, "loss": 0.6978, "step": 8108 }, { "epoch": 0.52, "grad_norm": 1.5474197018620084, "learning_rate": 4.9346988549647974e-06, "loss": 0.6424, "step": 8109 }, { "epoch": 0.52, "grad_norm": 1.6985742167998465, "learning_rate": 4.933662389201102e-06, "loss": 0.6952, "step": 8110 }, { "epoch": 0.52, "grad_norm": 1.5365796349853225, "learning_rate": 4.9326259262884565e-06, "loss": 0.6412, "step": 8111 }, { "epoch": 0.52, "grad_norm": 1.5178421682596848, "learning_rate": 4.931589466271406e-06, "loss": 0.6165, "step": 8112 }, { "epoch": 0.52, "grad_norm": 1.2124822259386945, "learning_rate": 4.9305530091945e-06, "loss": 0.6215, "step": 8113 }, { "epoch": 0.52, "grad_norm": 1.8479769204615764, "learning_rate": 4.929516555102277e-06, "loss": 0.7242, "step": 8114 }, { "epoch": 0.52, "grad_norm": 1.4153539434387683, "learning_rate": 4.928480104039285e-06, "loss": 0.662, "step": 8115 }, { "epoch": 0.52, "grad_norm": 1.5852027542362888, "learning_rate": 4.927443656050067e-06, "loss": 0.6579, "step": 8116 }, { "epoch": 0.52, "grad_norm": 1.7410548233392547, "learning_rate": 4.92640721117917e-06, "loss": 0.7043, "step": 8117 }, { "epoch": 0.52, "grad_norm": 1.579793354689458, "learning_rate": 4.925370769471135e-06, "loss": 0.6892, "step": 8118 }, { "epoch": 0.52, "grad_norm": 1.4327308827997471, "learning_rate": 4.924334330970509e-06, "loss": 0.6655, "step": 8119 }, { "epoch": 0.52, "grad_norm": 1.5014931697938578, "learning_rate": 4.923297895721833e-06, "loss": 0.5801, "step": 8120 }, { "epoch": 0.52, "grad_norm": 1.0721467625014693, "learning_rate": 4.922261463769652e-06, "loss": 0.6427, "step": 8121 }, { "epoch": 0.52, "grad_norm": 1.762996367961235, "learning_rate": 4.92122503515851e-06, "loss": 0.7332, "step": 8122 }, { "epoch": 0.52, "grad_norm": 1.3769752336924328, "learning_rate": 4.920188609932951e-06, "loss": 0.6143, "step": 8123 }, { "epoch": 0.52, "grad_norm": 1.438505529781881, "learning_rate": 4.919152188137519e-06, "loss": 0.6341, "step": 8124 }, { "epoch": 0.52, "grad_norm": 1.5735810245140296, "learning_rate": 4.918115769816754e-06, "loss": 0.6196, "step": 8125 }, { "epoch": 0.52, "grad_norm": 1.4009316378582688, "learning_rate": 4.917079355015204e-06, "loss": 0.668, "step": 8126 }, { "epoch": 0.52, "grad_norm": 1.5825972570028446, "learning_rate": 4.9160429437774065e-06, "loss": 0.6771, "step": 8127 }, { "epoch": 0.52, "grad_norm": 1.7246582761366256, "learning_rate": 4.915006536147908e-06, "loss": 0.6949, "step": 8128 }, { "epoch": 0.52, "grad_norm": 1.5411745704050228, "learning_rate": 4.913970132171248e-06, "loss": 0.7016, "step": 8129 }, { "epoch": 0.52, "grad_norm": 1.0904064222898346, "learning_rate": 4.912933731891972e-06, "loss": 0.6677, "step": 8130 }, { "epoch": 0.52, "grad_norm": 1.7122363323860117, "learning_rate": 4.911897335354624e-06, "loss": 0.7438, "step": 8131 }, { "epoch": 0.52, "grad_norm": 1.3307865637492167, "learning_rate": 4.91086094260374e-06, "loss": 0.5986, "step": 8132 }, { "epoch": 0.52, "grad_norm": 1.3312427795044053, "learning_rate": 4.909824553683868e-06, "loss": 0.6603, "step": 8133 }, { "epoch": 0.52, "grad_norm": 1.0594885699128491, "learning_rate": 4.908788168639545e-06, "loss": 0.8426, "step": 8134 }, { "epoch": 0.52, "grad_norm": 1.344407540132714, "learning_rate": 4.907751787515316e-06, "loss": 0.6488, "step": 8135 }, { "epoch": 0.52, "grad_norm": 1.5300522394016507, "learning_rate": 4.90671541035572e-06, "loss": 0.711, "step": 8136 }, { "epoch": 0.52, "grad_norm": 1.5368040376401755, "learning_rate": 4.905679037205302e-06, "loss": 0.6685, "step": 8137 }, { "epoch": 0.52, "grad_norm": 0.9511291747078531, "learning_rate": 4.904642668108599e-06, "loss": 0.5396, "step": 8138 }, { "epoch": 0.52, "grad_norm": 1.4653044506022908, "learning_rate": 4.903606303110153e-06, "loss": 0.609, "step": 8139 }, { "epoch": 0.52, "grad_norm": 1.4068031262100236, "learning_rate": 4.902569942254508e-06, "loss": 0.6854, "step": 8140 }, { "epoch": 0.52, "grad_norm": 1.5493459438463406, "learning_rate": 4.9015335855862e-06, "loss": 0.706, "step": 8141 }, { "epoch": 0.52, "grad_norm": 1.532693977491143, "learning_rate": 4.900497233149773e-06, "loss": 0.6923, "step": 8142 }, { "epoch": 0.52, "grad_norm": 1.578137409753625, "learning_rate": 4.899460884989764e-06, "loss": 0.6929, "step": 8143 }, { "epoch": 0.52, "grad_norm": 1.5015987621524889, "learning_rate": 4.898424541150717e-06, "loss": 0.7149, "step": 8144 }, { "epoch": 0.52, "grad_norm": 1.956064257454797, "learning_rate": 4.897388201677169e-06, "loss": 0.803, "step": 8145 }, { "epoch": 0.52, "grad_norm": 2.1650628917748325, "learning_rate": 4.896351866613661e-06, "loss": 0.6147, "step": 8146 }, { "epoch": 0.52, "grad_norm": 1.4285125111720278, "learning_rate": 4.895315536004731e-06, "loss": 0.6922, "step": 8147 }, { "epoch": 0.52, "grad_norm": 1.5318856601263213, "learning_rate": 4.8942792098949184e-06, "loss": 0.6853, "step": 8148 }, { "epoch": 0.52, "grad_norm": 1.5682726573317605, "learning_rate": 4.893242888328765e-06, "loss": 0.6488, "step": 8149 }, { "epoch": 0.52, "grad_norm": 1.6325109838014051, "learning_rate": 4.892206571350808e-06, "loss": 0.6374, "step": 8150 }, { "epoch": 0.52, "grad_norm": 1.6618737232593934, "learning_rate": 4.891170259005585e-06, "loss": 0.6382, "step": 8151 }, { "epoch": 0.52, "grad_norm": 1.1070221611814908, "learning_rate": 4.890133951337636e-06, "loss": 0.7442, "step": 8152 }, { "epoch": 0.52, "grad_norm": 1.0454847193794983, "learning_rate": 4.8890976483915e-06, "loss": 0.8307, "step": 8153 }, { "epoch": 0.52, "grad_norm": 1.0234834959829069, "learning_rate": 4.888061350211712e-06, "loss": 0.5991, "step": 8154 }, { "epoch": 0.52, "grad_norm": 1.606009778519431, "learning_rate": 4.887025056842815e-06, "loss": 0.7076, "step": 8155 }, { "epoch": 0.52, "grad_norm": 1.1370960031681838, "learning_rate": 4.88598876832934e-06, "loss": 0.6839, "step": 8156 }, { "epoch": 0.52, "grad_norm": 1.4446031099329268, "learning_rate": 4.88495248471583e-06, "loss": 0.693, "step": 8157 }, { "epoch": 0.52, "grad_norm": 1.529455923514455, "learning_rate": 4.883916206046819e-06, "loss": 0.6465, "step": 8158 }, { "epoch": 0.52, "grad_norm": 1.5161884056914263, "learning_rate": 4.8828799323668465e-06, "loss": 0.6557, "step": 8159 }, { "epoch": 0.52, "grad_norm": 1.5033678726867077, "learning_rate": 4.88184366372045e-06, "loss": 0.617, "step": 8160 }, { "epoch": 0.52, "grad_norm": 1.345721805494842, "learning_rate": 4.880807400152162e-06, "loss": 0.6339, "step": 8161 }, { "epoch": 0.52, "grad_norm": 1.286501315575836, "learning_rate": 4.879771141706524e-06, "loss": 0.7257, "step": 8162 }, { "epoch": 0.52, "grad_norm": 1.3762605226106768, "learning_rate": 4.878734888428068e-06, "loss": 0.6035, "step": 8163 }, { "epoch": 0.52, "grad_norm": 1.527645370249277, "learning_rate": 4.8776986403613324e-06, "loss": 0.5743, "step": 8164 }, { "epoch": 0.52, "grad_norm": 1.1042356455186004, "learning_rate": 4.876662397550851e-06, "loss": 0.64, "step": 8165 }, { "epoch": 0.52, "grad_norm": 1.512575367659316, "learning_rate": 4.875626160041163e-06, "loss": 0.7798, "step": 8166 }, { "epoch": 0.52, "grad_norm": 1.6076628224665979, "learning_rate": 4.874589927876799e-06, "loss": 0.7128, "step": 8167 }, { "epoch": 0.52, "grad_norm": 1.469051198376628, "learning_rate": 4.873553701102297e-06, "loss": 0.72, "step": 8168 }, { "epoch": 0.52, "grad_norm": 1.738723352743454, "learning_rate": 4.872517479762192e-06, "loss": 0.7059, "step": 8169 }, { "epoch": 0.52, "grad_norm": 1.55988937831251, "learning_rate": 4.871481263901017e-06, "loss": 0.7463, "step": 8170 }, { "epoch": 0.52, "grad_norm": 1.481133146986501, "learning_rate": 4.870445053563307e-06, "loss": 0.6335, "step": 8171 }, { "epoch": 0.52, "grad_norm": 1.4751240879523109, "learning_rate": 4.8694088487935964e-06, "loss": 0.6865, "step": 8172 }, { "epoch": 0.52, "grad_norm": 1.4708274153843406, "learning_rate": 4.868372649636421e-06, "loss": 0.6652, "step": 8173 }, { "epoch": 0.52, "grad_norm": 1.602483590118282, "learning_rate": 4.86733645613631e-06, "loss": 0.6936, "step": 8174 }, { "epoch": 0.52, "grad_norm": 1.4793076019542943, "learning_rate": 4.866300268337802e-06, "loss": 0.6852, "step": 8175 }, { "epoch": 0.52, "grad_norm": 1.42272293623097, "learning_rate": 4.865264086285425e-06, "loss": 0.7058, "step": 8176 }, { "epoch": 0.52, "grad_norm": 0.9907197665654008, "learning_rate": 4.864227910023714e-06, "loss": 0.4863, "step": 8177 }, { "epoch": 0.52, "grad_norm": 1.5042396936643405, "learning_rate": 4.863191739597205e-06, "loss": 0.6494, "step": 8178 }, { "epoch": 0.52, "grad_norm": 1.4906741711111862, "learning_rate": 4.862155575050426e-06, "loss": 0.6155, "step": 8179 }, { "epoch": 0.52, "grad_norm": 1.4868987035808938, "learning_rate": 4.861119416427911e-06, "loss": 0.6222, "step": 8180 }, { "epoch": 0.52, "grad_norm": 1.5026765965297129, "learning_rate": 4.86008326377419e-06, "loss": 0.7427, "step": 8181 }, { "epoch": 0.52, "grad_norm": 1.5849451037770317, "learning_rate": 4.859047117133799e-06, "loss": 0.7825, "step": 8182 }, { "epoch": 0.52, "grad_norm": 1.3815403030884992, "learning_rate": 4.858010976551265e-06, "loss": 0.6519, "step": 8183 }, { "epoch": 0.52, "grad_norm": 1.522938117267335, "learning_rate": 4.856974842071122e-06, "loss": 0.6466, "step": 8184 }, { "epoch": 0.52, "grad_norm": 1.57536628316331, "learning_rate": 4.855938713737899e-06, "loss": 0.6222, "step": 8185 }, { "epoch": 0.52, "grad_norm": 1.5536771306870802, "learning_rate": 4.854902591596127e-06, "loss": 0.6209, "step": 8186 }, { "epoch": 0.52, "grad_norm": 1.4932927892557954, "learning_rate": 4.853866475690338e-06, "loss": 0.6695, "step": 8187 }, { "epoch": 0.52, "grad_norm": 1.4503701105296845, "learning_rate": 4.852830366065059e-06, "loss": 0.7241, "step": 8188 }, { "epoch": 0.52, "grad_norm": 1.4662220046309318, "learning_rate": 4.851794262764824e-06, "loss": 0.6871, "step": 8189 }, { "epoch": 0.52, "grad_norm": 1.6946761166926334, "learning_rate": 4.850758165834157e-06, "loss": 0.7645, "step": 8190 }, { "epoch": 0.52, "grad_norm": 1.529585399420914, "learning_rate": 4.849722075317594e-06, "loss": 0.648, "step": 8191 }, { "epoch": 0.52, "grad_norm": 1.5569159901678524, "learning_rate": 4.8486859912596575e-06, "loss": 0.6887, "step": 8192 }, { "epoch": 0.52, "grad_norm": 1.4547514238880446, "learning_rate": 4.847649913704881e-06, "loss": 0.6528, "step": 8193 }, { "epoch": 0.52, "grad_norm": 1.492878455590815, "learning_rate": 4.8466138426977895e-06, "loss": 0.6861, "step": 8194 }, { "epoch": 0.52, "grad_norm": 1.4569868817969407, "learning_rate": 4.845577778282913e-06, "loss": 0.734, "step": 8195 }, { "epoch": 0.52, "grad_norm": 1.6854181695998802, "learning_rate": 4.8445417205047805e-06, "loss": 0.6897, "step": 8196 }, { "epoch": 0.52, "grad_norm": 1.5276459443708152, "learning_rate": 4.843505669407917e-06, "loss": 0.6357, "step": 8197 }, { "epoch": 0.52, "grad_norm": 1.5025668443557876, "learning_rate": 4.842469625036853e-06, "loss": 0.6193, "step": 8198 }, { "epoch": 0.52, "grad_norm": 1.5428100304813224, "learning_rate": 4.841433587436111e-06, "loss": 0.5317, "step": 8199 }, { "epoch": 0.52, "grad_norm": 1.4700267047387907, "learning_rate": 4.840397556650222e-06, "loss": 0.6501, "step": 8200 }, { "epoch": 0.52, "grad_norm": 1.5776475316655865, "learning_rate": 4.839361532723708e-06, "loss": 0.7072, "step": 8201 }, { "epoch": 0.52, "grad_norm": 1.5180097197749436, "learning_rate": 4.838325515701101e-06, "loss": 0.6707, "step": 8202 }, { "epoch": 0.53, "grad_norm": 1.386368937007519, "learning_rate": 4.837289505626921e-06, "loss": 0.575, "step": 8203 }, { "epoch": 0.53, "grad_norm": 1.3100471981422308, "learning_rate": 4.836253502545697e-06, "loss": 0.6721, "step": 8204 }, { "epoch": 0.53, "grad_norm": 2.233713208363838, "learning_rate": 4.835217506501955e-06, "loss": 0.731, "step": 8205 }, { "epoch": 0.53, "grad_norm": 1.4288166468251546, "learning_rate": 4.834181517540216e-06, "loss": 0.637, "step": 8206 }, { "epoch": 0.53, "grad_norm": 1.452535416229041, "learning_rate": 4.8331455357050084e-06, "loss": 0.638, "step": 8207 }, { "epoch": 0.53, "grad_norm": 1.0375045939130803, "learning_rate": 4.832109561040853e-06, "loss": 0.6006, "step": 8208 }, { "epoch": 0.53, "grad_norm": 1.7001824349440666, "learning_rate": 4.831073593592279e-06, "loss": 0.681, "step": 8209 }, { "epoch": 0.53, "grad_norm": 1.5238613411792812, "learning_rate": 4.830037633403805e-06, "loss": 0.6826, "step": 8210 }, { "epoch": 0.53, "grad_norm": 1.4112230671681913, "learning_rate": 4.829001680519958e-06, "loss": 0.6758, "step": 8211 }, { "epoch": 0.53, "grad_norm": 1.6207215839315912, "learning_rate": 4.8279657349852575e-06, "loss": 0.6532, "step": 8212 }, { "epoch": 0.53, "grad_norm": 1.4689175105878642, "learning_rate": 4.826929796844228e-06, "loss": 0.6416, "step": 8213 }, { "epoch": 0.53, "grad_norm": 1.6446192530157988, "learning_rate": 4.825893866141394e-06, "loss": 0.7139, "step": 8214 }, { "epoch": 0.53, "grad_norm": 1.515374733548182, "learning_rate": 4.824857942921274e-06, "loss": 0.7784, "step": 8215 }, { "epoch": 0.53, "grad_norm": 1.5825784304109471, "learning_rate": 4.823822027228394e-06, "loss": 0.6428, "step": 8216 }, { "epoch": 0.53, "grad_norm": 1.4568547237588887, "learning_rate": 4.822786119107271e-06, "loss": 0.8193, "step": 8217 }, { "epoch": 0.53, "grad_norm": 1.6885191036479226, "learning_rate": 4.82175021860243e-06, "loss": 0.7235, "step": 8218 }, { "epoch": 0.53, "grad_norm": 1.577105748269175, "learning_rate": 4.820714325758388e-06, "loss": 0.6511, "step": 8219 }, { "epoch": 0.53, "grad_norm": 1.3747045869239247, "learning_rate": 4.8196784406196685e-06, "loss": 0.7006, "step": 8220 }, { "epoch": 0.53, "grad_norm": 1.6049584974815327, "learning_rate": 4.818642563230791e-06, "loss": 0.7959, "step": 8221 }, { "epoch": 0.53, "grad_norm": 1.7586292974499902, "learning_rate": 4.817606693636273e-06, "loss": 0.6558, "step": 8222 }, { "epoch": 0.53, "grad_norm": 1.5114584604132526, "learning_rate": 4.8165708318806385e-06, "loss": 0.635, "step": 8223 }, { "epoch": 0.53, "grad_norm": 1.7486606717763438, "learning_rate": 4.815534978008403e-06, "loss": 0.6814, "step": 8224 }, { "epoch": 0.53, "grad_norm": 1.6607180839948092, "learning_rate": 4.814499132064088e-06, "loss": 0.6451, "step": 8225 }, { "epoch": 0.53, "grad_norm": 1.5435929087646243, "learning_rate": 4.813463294092209e-06, "loss": 0.6313, "step": 8226 }, { "epoch": 0.53, "grad_norm": 1.6377223425669314, "learning_rate": 4.8124274641372875e-06, "loss": 0.7298, "step": 8227 }, { "epoch": 0.53, "grad_norm": 1.7063784796588493, "learning_rate": 4.811391642243837e-06, "loss": 0.7678, "step": 8228 }, { "epoch": 0.53, "grad_norm": 1.6097095179657988, "learning_rate": 4.810355828456379e-06, "loss": 0.6736, "step": 8229 }, { "epoch": 0.53, "grad_norm": 1.4185914847576195, "learning_rate": 4.809320022819427e-06, "loss": 0.7095, "step": 8230 }, { "epoch": 0.53, "grad_norm": 1.4828553685486483, "learning_rate": 4.808284225377503e-06, "loss": 0.6147, "step": 8231 }, { "epoch": 0.53, "grad_norm": 1.3327387921760512, "learning_rate": 4.8072484361751176e-06, "loss": 0.755, "step": 8232 }, { "epoch": 0.53, "grad_norm": 1.6498469026831561, "learning_rate": 4.8062126552567896e-06, "loss": 0.6435, "step": 8233 }, { "epoch": 0.53, "grad_norm": 1.0738969936146456, "learning_rate": 4.805176882667036e-06, "loss": 0.7104, "step": 8234 }, { "epoch": 0.53, "grad_norm": 1.580296056947703, "learning_rate": 4.804141118450369e-06, "loss": 0.6935, "step": 8235 }, { "epoch": 0.53, "grad_norm": 1.4037078734220376, "learning_rate": 4.803105362651306e-06, "loss": 0.6396, "step": 8236 }, { "epoch": 0.53, "grad_norm": 1.9693705569551483, "learning_rate": 4.80206961531436e-06, "loss": 0.6371, "step": 8237 }, { "epoch": 0.53, "grad_norm": 1.650169326229627, "learning_rate": 4.801033876484048e-06, "loss": 0.7378, "step": 8238 }, { "epoch": 0.53, "grad_norm": 1.7040860424629727, "learning_rate": 4.799998146204879e-06, "loss": 0.6324, "step": 8239 }, { "epoch": 0.53, "grad_norm": 1.4202283963248536, "learning_rate": 4.798962424521373e-06, "loss": 0.5886, "step": 8240 }, { "epoch": 0.53, "grad_norm": 1.4766966826098638, "learning_rate": 4.797926711478037e-06, "loss": 0.6558, "step": 8241 }, { "epoch": 0.53, "grad_norm": 1.1048983258323246, "learning_rate": 4.796891007119385e-06, "loss": 0.6939, "step": 8242 }, { "epoch": 0.53, "grad_norm": 1.6104651537462895, "learning_rate": 4.7958553114899335e-06, "loss": 0.6607, "step": 8243 }, { "epoch": 0.53, "grad_norm": 1.459267478539909, "learning_rate": 4.794819624634189e-06, "loss": 0.6743, "step": 8244 }, { "epoch": 0.53, "grad_norm": 1.417274042245907, "learning_rate": 4.793783946596669e-06, "loss": 0.7063, "step": 8245 }, { "epoch": 0.53, "grad_norm": 1.5608860719800892, "learning_rate": 4.79274827742188e-06, "loss": 0.7216, "step": 8246 }, { "epoch": 0.53, "grad_norm": 1.6195133020625694, "learning_rate": 4.791712617154336e-06, "loss": 0.6261, "step": 8247 }, { "epoch": 0.53, "grad_norm": 1.066986766488626, "learning_rate": 4.790676965838545e-06, "loss": 0.6465, "step": 8248 }, { "epoch": 0.53, "grad_norm": 2.5384546454707704, "learning_rate": 4.789641323519018e-06, "loss": 0.619, "step": 8249 }, { "epoch": 0.53, "grad_norm": 1.470489857344088, "learning_rate": 4.788605690240264e-06, "loss": 0.65, "step": 8250 }, { "epoch": 0.53, "grad_norm": 1.739973736988899, "learning_rate": 4.787570066046794e-06, "loss": 0.612, "step": 8251 }, { "epoch": 0.53, "grad_norm": 1.43419351918509, "learning_rate": 4.7865344509831165e-06, "loss": 0.6685, "step": 8252 }, { "epoch": 0.53, "grad_norm": 1.5644684657665695, "learning_rate": 4.785498845093739e-06, "loss": 0.6781, "step": 8253 }, { "epoch": 0.53, "grad_norm": 1.7049623319782676, "learning_rate": 4.784463248423172e-06, "loss": 0.7014, "step": 8254 }, { "epoch": 0.53, "grad_norm": 1.5048552970947804, "learning_rate": 4.78342766101592e-06, "loss": 0.6337, "step": 8255 }, { "epoch": 0.53, "grad_norm": 0.9927035396726098, "learning_rate": 4.782392082916493e-06, "loss": 0.6035, "step": 8256 }, { "epoch": 0.53, "grad_norm": 1.7554381584499215, "learning_rate": 4.7813565141693965e-06, "loss": 0.6043, "step": 8257 }, { "epoch": 0.53, "grad_norm": 2.0136048333871464, "learning_rate": 4.7803209548191395e-06, "loss": 0.6578, "step": 8258 }, { "epoch": 0.53, "grad_norm": 1.4193217130128892, "learning_rate": 4.7792854049102234e-06, "loss": 0.6492, "step": 8259 }, { "epoch": 0.53, "grad_norm": 1.3761482904520623, "learning_rate": 4.778249864487157e-06, "loss": 0.7133, "step": 8260 }, { "epoch": 0.53, "grad_norm": 1.1804621158491473, "learning_rate": 4.7772143335944486e-06, "loss": 0.6821, "step": 8261 }, { "epoch": 0.53, "grad_norm": 1.5634844001762893, "learning_rate": 4.776178812276596e-06, "loss": 0.6959, "step": 8262 }, { "epoch": 0.53, "grad_norm": 1.0387261997569084, "learning_rate": 4.775143300578112e-06, "loss": 0.6918, "step": 8263 }, { "epoch": 0.53, "grad_norm": 1.637192066148503, "learning_rate": 4.774107798543492e-06, "loss": 0.6508, "step": 8264 }, { "epoch": 0.53, "grad_norm": 1.688161899616236, "learning_rate": 4.773072306217247e-06, "loss": 0.6935, "step": 8265 }, { "epoch": 0.53, "grad_norm": 1.1432372753676634, "learning_rate": 4.772036823643876e-06, "loss": 0.6253, "step": 8266 }, { "epoch": 0.53, "grad_norm": 1.7392062100255352, "learning_rate": 4.771001350867886e-06, "loss": 0.812, "step": 8267 }, { "epoch": 0.53, "grad_norm": 1.5670964125806524, "learning_rate": 4.769965887933774e-06, "loss": 0.7022, "step": 8268 }, { "epoch": 0.53, "grad_norm": 1.3559678458547832, "learning_rate": 4.768930434886044e-06, "loss": 0.6254, "step": 8269 }, { "epoch": 0.53, "grad_norm": 3.8278702112821743, "learning_rate": 4.7678949917692015e-06, "loss": 0.7057, "step": 8270 }, { "epoch": 0.53, "grad_norm": 1.5807129609849095, "learning_rate": 4.766859558627741e-06, "loss": 0.6151, "step": 8271 }, { "epoch": 0.53, "grad_norm": 1.8320283183657284, "learning_rate": 4.765824135506169e-06, "loss": 0.6759, "step": 8272 }, { "epoch": 0.53, "grad_norm": 1.614419767651414, "learning_rate": 4.7647887224489834e-06, "loss": 0.6955, "step": 8273 }, { "epoch": 0.53, "grad_norm": 1.3758443352113967, "learning_rate": 4.763753319500685e-06, "loss": 0.697, "step": 8274 }, { "epoch": 0.53, "grad_norm": 2.013065344230374, "learning_rate": 4.762717926705771e-06, "loss": 0.6679, "step": 8275 }, { "epoch": 0.53, "grad_norm": 1.332831187407921, "learning_rate": 4.761682544108744e-06, "loss": 0.751, "step": 8276 }, { "epoch": 0.53, "grad_norm": 1.5880440475236641, "learning_rate": 4.760647171754098e-06, "loss": 0.7484, "step": 8277 }, { "epoch": 0.53, "grad_norm": 1.4331819324478812, "learning_rate": 4.759611809686334e-06, "loss": 0.6018, "step": 8278 }, { "epoch": 0.53, "grad_norm": 1.504465678352365, "learning_rate": 4.75857645794995e-06, "loss": 0.6149, "step": 8279 }, { "epoch": 0.53, "grad_norm": 1.4067375177751869, "learning_rate": 4.757541116589441e-06, "loss": 0.6136, "step": 8280 }, { "epoch": 0.53, "grad_norm": 1.7889664755743748, "learning_rate": 4.756505785649309e-06, "loss": 0.6969, "step": 8281 }, { "epoch": 0.53, "grad_norm": 1.6100242077427225, "learning_rate": 4.755470465174044e-06, "loss": 0.7849, "step": 8282 }, { "epoch": 0.53, "grad_norm": 1.4780314422767475, "learning_rate": 4.754435155208146e-06, "loss": 0.6423, "step": 8283 }, { "epoch": 0.53, "grad_norm": 1.0788878965635273, "learning_rate": 4.753399855796107e-06, "loss": 0.6786, "step": 8284 }, { "epoch": 0.53, "grad_norm": 1.569741109970484, "learning_rate": 4.752364566982425e-06, "loss": 0.6904, "step": 8285 }, { "epoch": 0.53, "grad_norm": 1.4838032300301833, "learning_rate": 4.751329288811593e-06, "loss": 0.7441, "step": 8286 }, { "epoch": 0.53, "grad_norm": 1.5108606941857787, "learning_rate": 4.750294021328105e-06, "loss": 0.7044, "step": 8287 }, { "epoch": 0.53, "grad_norm": 1.5344842203910862, "learning_rate": 4.749258764576458e-06, "loss": 0.7749, "step": 8288 }, { "epoch": 0.53, "grad_norm": 1.7254389346889871, "learning_rate": 4.748223518601141e-06, "loss": 0.6736, "step": 8289 }, { "epoch": 0.53, "grad_norm": 1.8493155358674978, "learning_rate": 4.747188283446649e-06, "loss": 0.7143, "step": 8290 }, { "epoch": 0.53, "grad_norm": 1.6942340428012295, "learning_rate": 4.7461530591574715e-06, "loss": 0.6638, "step": 8291 }, { "epoch": 0.53, "grad_norm": 1.5976110058053947, "learning_rate": 4.745117845778103e-06, "loss": 0.7572, "step": 8292 }, { "epoch": 0.53, "grad_norm": 1.3945511546475007, "learning_rate": 4.744082643353034e-06, "loss": 0.6766, "step": 8293 }, { "epoch": 0.53, "grad_norm": 1.5296376677471788, "learning_rate": 4.743047451926756e-06, "loss": 0.6821, "step": 8294 }, { "epoch": 0.53, "grad_norm": 1.5798590426638934, "learning_rate": 4.742012271543757e-06, "loss": 0.6705, "step": 8295 }, { "epoch": 0.53, "grad_norm": 1.4962532217464533, "learning_rate": 4.740977102248528e-06, "loss": 0.6263, "step": 8296 }, { "epoch": 0.53, "grad_norm": 1.6488198391310953, "learning_rate": 4.739941944085561e-06, "loss": 0.6709, "step": 8297 }, { "epoch": 0.53, "grad_norm": 1.5738383723536413, "learning_rate": 4.738906797099342e-06, "loss": 0.6905, "step": 8298 }, { "epoch": 0.53, "grad_norm": 1.086744575213431, "learning_rate": 4.73787166133436e-06, "loss": 0.6178, "step": 8299 }, { "epoch": 0.53, "grad_norm": 1.3986162379006877, "learning_rate": 4.736836536835104e-06, "loss": 0.6054, "step": 8300 }, { "epoch": 0.53, "grad_norm": 1.236090764878588, "learning_rate": 4.735801423646059e-06, "loss": 0.7046, "step": 8301 }, { "epoch": 0.53, "grad_norm": 1.688772663624547, "learning_rate": 4.734766321811714e-06, "loss": 0.7947, "step": 8302 }, { "epoch": 0.53, "grad_norm": 1.503192542202957, "learning_rate": 4.733731231376557e-06, "loss": 0.7427, "step": 8303 }, { "epoch": 0.53, "grad_norm": 2.4462082181228184, "learning_rate": 4.732696152385071e-06, "loss": 0.656, "step": 8304 }, { "epoch": 0.53, "grad_norm": 1.3981141160281538, "learning_rate": 4.731661084881744e-06, "loss": 0.6833, "step": 8305 }, { "epoch": 0.53, "grad_norm": 1.652119835226763, "learning_rate": 4.730626028911058e-06, "loss": 0.7363, "step": 8306 }, { "epoch": 0.53, "grad_norm": 1.6127605313872022, "learning_rate": 4.729590984517498e-06, "loss": 0.6688, "step": 8307 }, { "epoch": 0.53, "grad_norm": 1.5077541251784947, "learning_rate": 4.7285559517455515e-06, "loss": 0.736, "step": 8308 }, { "epoch": 0.53, "grad_norm": 1.6748318875082027, "learning_rate": 4.727520930639698e-06, "loss": 0.7158, "step": 8309 }, { "epoch": 0.53, "grad_norm": 1.4660517937399038, "learning_rate": 4.7264859212444245e-06, "loss": 0.7414, "step": 8310 }, { "epoch": 0.53, "grad_norm": 1.5552248568499518, "learning_rate": 4.7254509236042105e-06, "loss": 0.73, "step": 8311 }, { "epoch": 0.53, "grad_norm": 1.3734444168342999, "learning_rate": 4.72441593776354e-06, "loss": 0.7012, "step": 8312 }, { "epoch": 0.53, "grad_norm": 1.4620177524523867, "learning_rate": 4.723380963766891e-06, "loss": 0.651, "step": 8313 }, { "epoch": 0.53, "grad_norm": 1.5028552101541608, "learning_rate": 4.7223460016587485e-06, "loss": 0.7085, "step": 8314 }, { "epoch": 0.53, "grad_norm": 1.392910667367176, "learning_rate": 4.72131105148359e-06, "loss": 0.6826, "step": 8315 }, { "epoch": 0.53, "grad_norm": 1.6126005002572643, "learning_rate": 4.720276113285897e-06, "loss": 0.5879, "step": 8316 }, { "epoch": 0.53, "grad_norm": 1.4056670902479191, "learning_rate": 4.719241187110152e-06, "loss": 0.6534, "step": 8317 }, { "epoch": 0.53, "grad_norm": 1.4531286495720028, "learning_rate": 4.718206273000827e-06, "loss": 0.7341, "step": 8318 }, { "epoch": 0.53, "grad_norm": 1.2678961441910357, "learning_rate": 4.717171371002407e-06, "loss": 0.6383, "step": 8319 }, { "epoch": 0.53, "grad_norm": 1.0291905751176278, "learning_rate": 4.716136481159366e-06, "loss": 0.679, "step": 8320 }, { "epoch": 0.53, "grad_norm": 1.425578630158898, "learning_rate": 4.715101603516183e-06, "loss": 0.58, "step": 8321 }, { "epoch": 0.53, "grad_norm": 1.569228936968484, "learning_rate": 4.7140667381173324e-06, "loss": 0.7031, "step": 8322 }, { "epoch": 0.53, "grad_norm": 1.4437287752728374, "learning_rate": 4.713031885007296e-06, "loss": 0.7297, "step": 8323 }, { "epoch": 0.53, "grad_norm": 1.2884046017515745, "learning_rate": 4.7119970442305435e-06, "loss": 0.5828, "step": 8324 }, { "epoch": 0.53, "grad_norm": 1.5013263128501366, "learning_rate": 4.710962215831553e-06, "loss": 0.6511, "step": 8325 }, { "epoch": 0.53, "grad_norm": 1.4033261510695658, "learning_rate": 4.709927399854801e-06, "loss": 0.7031, "step": 8326 }, { "epoch": 0.53, "grad_norm": 1.5341616565618954, "learning_rate": 4.708892596344758e-06, "loss": 0.6453, "step": 8327 }, { "epoch": 0.53, "grad_norm": 1.4628200992758227, "learning_rate": 4.7078578053459e-06, "loss": 0.693, "step": 8328 }, { "epoch": 0.53, "grad_norm": 1.6763248875107326, "learning_rate": 4.706823026902699e-06, "loss": 0.6791, "step": 8329 }, { "epoch": 0.53, "grad_norm": 1.4849965279638218, "learning_rate": 4.705788261059631e-06, "loss": 0.7192, "step": 8330 }, { "epoch": 0.53, "grad_norm": 2.195868010919778, "learning_rate": 4.7047535078611626e-06, "loss": 0.678, "step": 8331 }, { "epoch": 0.53, "grad_norm": 1.596934294670937, "learning_rate": 4.70371876735177e-06, "loss": 0.6958, "step": 8332 }, { "epoch": 0.53, "grad_norm": 1.4832555202913082, "learning_rate": 4.70268403957592e-06, "loss": 0.7951, "step": 8333 }, { "epoch": 0.53, "grad_norm": 1.3011214159955458, "learning_rate": 4.701649324578085e-06, "loss": 0.6486, "step": 8334 }, { "epoch": 0.53, "grad_norm": 1.343107976298958, "learning_rate": 4.700614622402737e-06, "loss": 0.6042, "step": 8335 }, { "epoch": 0.53, "grad_norm": 1.4969822997683522, "learning_rate": 4.699579933094343e-06, "loss": 0.6772, "step": 8336 }, { "epoch": 0.53, "grad_norm": 1.2813259099886023, "learning_rate": 4.698545256697373e-06, "loss": 0.6528, "step": 8337 }, { "epoch": 0.53, "grad_norm": 2.1009085138801025, "learning_rate": 4.697510593256292e-06, "loss": 0.7177, "step": 8338 }, { "epoch": 0.53, "grad_norm": 2.0315237463066955, "learning_rate": 4.696475942815573e-06, "loss": 0.5577, "step": 8339 }, { "epoch": 0.53, "grad_norm": 1.6138599900309847, "learning_rate": 4.695441305419678e-06, "loss": 0.6898, "step": 8340 }, { "epoch": 0.53, "grad_norm": 1.7029265890856942, "learning_rate": 4.6944066811130775e-06, "loss": 0.7024, "step": 8341 }, { "epoch": 0.53, "grad_norm": 1.5329471283225342, "learning_rate": 4.693372069940234e-06, "loss": 0.7352, "step": 8342 }, { "epoch": 0.53, "grad_norm": 1.6051621820476025, "learning_rate": 4.692337471945614e-06, "loss": 0.7614, "step": 8343 }, { "epoch": 0.53, "grad_norm": 1.015002467481013, "learning_rate": 4.691302887173685e-06, "loss": 0.6276, "step": 8344 }, { "epoch": 0.53, "grad_norm": 1.7137139624979016, "learning_rate": 4.690268315668908e-06, "loss": 0.7203, "step": 8345 }, { "epoch": 0.53, "grad_norm": 1.5540940164268426, "learning_rate": 4.68923375747575e-06, "loss": 0.7471, "step": 8346 }, { "epoch": 0.53, "grad_norm": 1.8868518835768735, "learning_rate": 4.688199212638669e-06, "loss": 0.6451, "step": 8347 }, { "epoch": 0.53, "grad_norm": 1.517498106792873, "learning_rate": 4.687164681202134e-06, "loss": 0.6668, "step": 8348 }, { "epoch": 0.53, "grad_norm": 1.6638993448098374, "learning_rate": 4.6861301632106005e-06, "loss": 0.7039, "step": 8349 }, { "epoch": 0.53, "grad_norm": 1.4322052797036218, "learning_rate": 4.685095658708535e-06, "loss": 0.6577, "step": 8350 }, { "epoch": 0.53, "grad_norm": 1.4512705797257117, "learning_rate": 4.684061167740395e-06, "loss": 0.6677, "step": 8351 }, { "epoch": 0.53, "grad_norm": 1.4736736477408199, "learning_rate": 4.683026690350642e-06, "loss": 0.6811, "step": 8352 }, { "epoch": 0.53, "grad_norm": 1.710264102236648, "learning_rate": 4.6819922265837385e-06, "loss": 0.6647, "step": 8353 }, { "epoch": 0.53, "grad_norm": 1.1346531980261314, "learning_rate": 4.680957776484138e-06, "loss": 0.7043, "step": 8354 }, { "epoch": 0.53, "grad_norm": 1.347642317972577, "learning_rate": 4.679923340096304e-06, "loss": 0.6112, "step": 8355 }, { "epoch": 0.53, "grad_norm": 1.6250784337014468, "learning_rate": 4.6788889174646905e-06, "loss": 0.6627, "step": 8356 }, { "epoch": 0.53, "grad_norm": 1.4721853038464532, "learning_rate": 4.677854508633757e-06, "loss": 0.7032, "step": 8357 }, { "epoch": 0.53, "grad_norm": 1.5393091058525548, "learning_rate": 4.676820113647959e-06, "loss": 0.5471, "step": 8358 }, { "epoch": 0.54, "grad_norm": 1.073360393523832, "learning_rate": 4.675785732551756e-06, "loss": 0.7135, "step": 8359 }, { "epoch": 0.54, "grad_norm": 1.784548831514462, "learning_rate": 4.674751365389599e-06, "loss": 0.6549, "step": 8360 }, { "epoch": 0.54, "grad_norm": 1.695330226675119, "learning_rate": 4.673717012205945e-06, "loss": 0.8225, "step": 8361 }, { "epoch": 0.54, "grad_norm": 1.5613538394039104, "learning_rate": 4.67268267304525e-06, "loss": 0.7056, "step": 8362 }, { "epoch": 0.54, "grad_norm": 1.7613363988812256, "learning_rate": 4.671648347951963e-06, "loss": 0.7351, "step": 8363 }, { "epoch": 0.54, "grad_norm": 1.5392877475911244, "learning_rate": 4.6706140369705425e-06, "loss": 0.6448, "step": 8364 }, { "epoch": 0.54, "grad_norm": 1.484933503285184, "learning_rate": 4.669579740145436e-06, "loss": 0.6882, "step": 8365 }, { "epoch": 0.54, "grad_norm": 1.4379671746623328, "learning_rate": 4.668545457521101e-06, "loss": 0.6054, "step": 8366 }, { "epoch": 0.54, "grad_norm": 1.5854461364558605, "learning_rate": 4.6675111891419835e-06, "loss": 0.6553, "step": 8367 }, { "epoch": 0.54, "grad_norm": 1.8369514046948068, "learning_rate": 4.666476935052539e-06, "loss": 0.6742, "step": 8368 }, { "epoch": 0.54, "grad_norm": 1.7745554144167972, "learning_rate": 4.665442695297212e-06, "loss": 0.6748, "step": 8369 }, { "epoch": 0.54, "grad_norm": 1.5264830936970453, "learning_rate": 4.664408469920455e-06, "loss": 0.6237, "step": 8370 }, { "epoch": 0.54, "grad_norm": 1.6481504594452794, "learning_rate": 4.663374258966718e-06, "loss": 0.6879, "step": 8371 }, { "epoch": 0.54, "grad_norm": 1.7046877818525263, "learning_rate": 4.662340062480449e-06, "loss": 0.6803, "step": 8372 }, { "epoch": 0.54, "grad_norm": 1.4176389172308816, "learning_rate": 4.661305880506094e-06, "loss": 0.7484, "step": 8373 }, { "epoch": 0.54, "grad_norm": 1.854431343490767, "learning_rate": 4.6602717130880985e-06, "loss": 0.8057, "step": 8374 }, { "epoch": 0.54, "grad_norm": 1.1506519202744045, "learning_rate": 4.659237560270914e-06, "loss": 0.6122, "step": 8375 }, { "epoch": 0.54, "grad_norm": 1.4288703969713972, "learning_rate": 4.6582034220989805e-06, "loss": 0.7472, "step": 8376 }, { "epoch": 0.54, "grad_norm": 1.5401112212785437, "learning_rate": 4.657169298616748e-06, "loss": 0.6483, "step": 8377 }, { "epoch": 0.54, "grad_norm": 1.413649879443614, "learning_rate": 4.6561351898686575e-06, "loss": 0.6717, "step": 8378 }, { "epoch": 0.54, "grad_norm": 1.494892202349833, "learning_rate": 4.655101095899154e-06, "loss": 0.6502, "step": 8379 }, { "epoch": 0.54, "grad_norm": 1.5033928539892392, "learning_rate": 4.65406701675268e-06, "loss": 0.6913, "step": 8380 }, { "epoch": 0.54, "grad_norm": 1.5680188273997395, "learning_rate": 4.653032952473678e-06, "loss": 0.6657, "step": 8381 }, { "epoch": 0.54, "grad_norm": 2.0140912521892784, "learning_rate": 4.651998903106594e-06, "loss": 0.778, "step": 8382 }, { "epoch": 0.54, "grad_norm": 1.3433440118288096, "learning_rate": 4.650964868695863e-06, "loss": 0.6587, "step": 8383 }, { "epoch": 0.54, "grad_norm": 1.4864187581249126, "learning_rate": 4.649930849285931e-06, "loss": 0.6286, "step": 8384 }, { "epoch": 0.54, "grad_norm": 1.42192295095884, "learning_rate": 4.648896844921233e-06, "loss": 0.6507, "step": 8385 }, { "epoch": 0.54, "grad_norm": 1.544210042061762, "learning_rate": 4.647862855646211e-06, "loss": 0.6761, "step": 8386 }, { "epoch": 0.54, "grad_norm": 1.5896210570131954, "learning_rate": 4.6468288815053045e-06, "loss": 0.5734, "step": 8387 }, { "epoch": 0.54, "grad_norm": 1.4848408953905188, "learning_rate": 4.645794922542951e-06, "loss": 0.7031, "step": 8388 }, { "epoch": 0.54, "grad_norm": 1.0814709544831742, "learning_rate": 4.644760978803587e-06, "loss": 0.6295, "step": 8389 }, { "epoch": 0.54, "grad_norm": 1.494266772112585, "learning_rate": 4.643727050331648e-06, "loss": 0.6495, "step": 8390 }, { "epoch": 0.54, "grad_norm": 1.4319522314743183, "learning_rate": 4.642693137171575e-06, "loss": 0.6585, "step": 8391 }, { "epoch": 0.54, "grad_norm": 1.4566930233577422, "learning_rate": 4.641659239367797e-06, "loss": 0.7335, "step": 8392 }, { "epoch": 0.54, "grad_norm": 1.3775497017476213, "learning_rate": 4.640625356964753e-06, "loss": 0.7087, "step": 8393 }, { "epoch": 0.54, "grad_norm": 1.6270201478959814, "learning_rate": 4.639591490006875e-06, "loss": 0.699, "step": 8394 }, { "epoch": 0.54, "grad_norm": 1.5394062367507901, "learning_rate": 4.638557638538601e-06, "loss": 0.7659, "step": 8395 }, { "epoch": 0.54, "grad_norm": 1.524745760406519, "learning_rate": 4.637523802604357e-06, "loss": 0.7143, "step": 8396 }, { "epoch": 0.54, "grad_norm": 1.430490257698266, "learning_rate": 4.63648998224858e-06, "loss": 0.6156, "step": 8397 }, { "epoch": 0.54, "grad_norm": 1.464992188578567, "learning_rate": 4.635456177515698e-06, "loss": 0.6857, "step": 8398 }, { "epoch": 0.54, "grad_norm": 1.4915751450641694, "learning_rate": 4.634422388450144e-06, "loss": 0.7483, "step": 8399 }, { "epoch": 0.54, "grad_norm": 1.589650753198767, "learning_rate": 4.633388615096348e-06, "loss": 0.7487, "step": 8400 }, { "epoch": 0.54, "grad_norm": 1.4622754581135915, "learning_rate": 4.632354857498738e-06, "loss": 0.6034, "step": 8401 }, { "epoch": 0.54, "grad_norm": 1.6774425868325147, "learning_rate": 4.631321115701746e-06, "loss": 0.6955, "step": 8402 }, { "epoch": 0.54, "grad_norm": 1.6334765792915371, "learning_rate": 4.6302873897497955e-06, "loss": 0.6778, "step": 8403 }, { "epoch": 0.54, "grad_norm": 1.118052220776501, "learning_rate": 4.629253679687318e-06, "loss": 0.7282, "step": 8404 }, { "epoch": 0.54, "grad_norm": 1.8132524776319843, "learning_rate": 4.628219985558737e-06, "loss": 0.6925, "step": 8405 }, { "epoch": 0.54, "grad_norm": 1.5632293871008338, "learning_rate": 4.627186307408481e-06, "loss": 0.5871, "step": 8406 }, { "epoch": 0.54, "grad_norm": 1.8740610191816331, "learning_rate": 4.626152645280972e-06, "loss": 0.6345, "step": 8407 }, { "epoch": 0.54, "grad_norm": 1.627192658740916, "learning_rate": 4.625118999220637e-06, "loss": 0.6522, "step": 8408 }, { "epoch": 0.54, "grad_norm": 1.3371055762854047, "learning_rate": 4.624085369271902e-06, "loss": 0.6138, "step": 8409 }, { "epoch": 0.54, "grad_norm": 1.3414301538913187, "learning_rate": 4.6230517554791854e-06, "loss": 0.656, "step": 8410 }, { "epoch": 0.54, "grad_norm": 1.5038580455370996, "learning_rate": 4.622018157886915e-06, "loss": 0.6956, "step": 8411 }, { "epoch": 0.54, "grad_norm": 1.5257222968202566, "learning_rate": 4.620984576539507e-06, "loss": 0.7175, "step": 8412 }, { "epoch": 0.54, "grad_norm": 1.6677764459366227, "learning_rate": 4.6199510114813875e-06, "loss": 0.6549, "step": 8413 }, { "epoch": 0.54, "grad_norm": 1.5192975976129341, "learning_rate": 4.618917462756975e-06, "loss": 0.7831, "step": 8414 }, { "epoch": 0.54, "grad_norm": 1.7401513878792376, "learning_rate": 4.617883930410688e-06, "loss": 0.7819, "step": 8415 }, { "epoch": 0.54, "grad_norm": 1.4798281961073392, "learning_rate": 4.616850414486946e-06, "loss": 0.7083, "step": 8416 }, { "epoch": 0.54, "grad_norm": 1.5335970092198465, "learning_rate": 4.615816915030169e-06, "loss": 0.7134, "step": 8417 }, { "epoch": 0.54, "grad_norm": 1.5027946856052896, "learning_rate": 4.614783432084775e-06, "loss": 0.6711, "step": 8418 }, { "epoch": 0.54, "grad_norm": 1.5719916640131846, "learning_rate": 4.613749965695178e-06, "loss": 0.671, "step": 8419 }, { "epoch": 0.54, "grad_norm": 1.4044965736107407, "learning_rate": 4.612716515905798e-06, "loss": 0.6681, "step": 8420 }, { "epoch": 0.54, "grad_norm": 1.1118654832625756, "learning_rate": 4.611683082761046e-06, "loss": 0.6138, "step": 8421 }, { "epoch": 0.54, "grad_norm": 1.5753129661154965, "learning_rate": 4.61064966630534e-06, "loss": 0.6601, "step": 8422 }, { "epoch": 0.54, "grad_norm": 1.4423716823664974, "learning_rate": 4.609616266583093e-06, "loss": 0.6698, "step": 8423 }, { "epoch": 0.54, "grad_norm": 1.7889309211576039, "learning_rate": 4.608582883638719e-06, "loss": 0.6416, "step": 8424 }, { "epoch": 0.54, "grad_norm": 1.6895848886865907, "learning_rate": 4.607549517516629e-06, "loss": 0.5837, "step": 8425 }, { "epoch": 0.54, "grad_norm": 1.8099690679907223, "learning_rate": 4.606516168261236e-06, "loss": 0.6709, "step": 8426 }, { "epoch": 0.54, "grad_norm": 1.5412952652785223, "learning_rate": 4.605482835916954e-06, "loss": 0.7496, "step": 8427 }, { "epoch": 0.54, "grad_norm": 1.5431058027907154, "learning_rate": 4.604449520528188e-06, "loss": 0.7631, "step": 8428 }, { "epoch": 0.54, "grad_norm": 1.788962134692038, "learning_rate": 4.603416222139352e-06, "loss": 0.6374, "step": 8429 }, { "epoch": 0.54, "grad_norm": 1.069923660917561, "learning_rate": 4.602382940794852e-06, "loss": 0.7198, "step": 8430 }, { "epoch": 0.54, "grad_norm": 1.5816725086357015, "learning_rate": 4.6013496765391e-06, "loss": 0.7416, "step": 8431 }, { "epoch": 0.54, "grad_norm": 1.6092556197212586, "learning_rate": 4.600316429416499e-06, "loss": 0.6028, "step": 8432 }, { "epoch": 0.54, "grad_norm": 1.5195411226244384, "learning_rate": 4.59928319947146e-06, "loss": 0.642, "step": 8433 }, { "epoch": 0.54, "grad_norm": 1.674992102509293, "learning_rate": 4.598249986748384e-06, "loss": 0.6665, "step": 8434 }, { "epoch": 0.54, "grad_norm": 1.5413665985975842, "learning_rate": 4.597216791291681e-06, "loss": 0.6234, "step": 8435 }, { "epoch": 0.54, "grad_norm": 1.8078084864616175, "learning_rate": 4.596183613145754e-06, "loss": 0.7108, "step": 8436 }, { "epoch": 0.54, "grad_norm": 1.453109734412845, "learning_rate": 4.595150452355006e-06, "loss": 0.6632, "step": 8437 }, { "epoch": 0.54, "grad_norm": 1.492691880823338, "learning_rate": 4.594117308963843e-06, "loss": 0.7625, "step": 8438 }, { "epoch": 0.54, "grad_norm": 1.4104553413707894, "learning_rate": 4.593084183016664e-06, "loss": 0.6272, "step": 8439 }, { "epoch": 0.54, "grad_norm": 1.666053092518778, "learning_rate": 4.592051074557873e-06, "loss": 0.7497, "step": 8440 }, { "epoch": 0.54, "grad_norm": 1.2902521384588446, "learning_rate": 4.5910179836318665e-06, "loss": 0.5737, "step": 8441 }, { "epoch": 0.54, "grad_norm": 1.5343552533858575, "learning_rate": 4.589984910283051e-06, "loss": 0.7881, "step": 8442 }, { "epoch": 0.54, "grad_norm": 1.6440045059381312, "learning_rate": 4.58895185455582e-06, "loss": 0.7508, "step": 8443 }, { "epoch": 0.54, "grad_norm": 1.3137954244122525, "learning_rate": 4.587918816494574e-06, "loss": 0.6434, "step": 8444 }, { "epoch": 0.54, "grad_norm": 1.2875193923807287, "learning_rate": 4.586885796143715e-06, "loss": 0.6329, "step": 8445 }, { "epoch": 0.54, "grad_norm": 1.0625684162387414, "learning_rate": 4.585852793547633e-06, "loss": 0.647, "step": 8446 }, { "epoch": 0.54, "grad_norm": 1.4558990426906013, "learning_rate": 4.58481980875073e-06, "loss": 0.6589, "step": 8447 }, { "epoch": 0.54, "grad_norm": 1.5160125322465396, "learning_rate": 4.583786841797398e-06, "loss": 0.6642, "step": 8448 }, { "epoch": 0.54, "grad_norm": 1.534012068440962, "learning_rate": 4.582753892732033e-06, "loss": 0.6712, "step": 8449 }, { "epoch": 0.54, "grad_norm": 1.3567472392061304, "learning_rate": 4.581720961599028e-06, "loss": 0.7285, "step": 8450 }, { "epoch": 0.54, "grad_norm": 1.6009330697320554, "learning_rate": 4.58068804844278e-06, "loss": 0.6519, "step": 8451 }, { "epoch": 0.54, "grad_norm": 1.5905826414971727, "learning_rate": 4.579655153307676e-06, "loss": 0.7258, "step": 8452 }, { "epoch": 0.54, "grad_norm": 1.6135231154429337, "learning_rate": 4.578622276238112e-06, "loss": 0.6404, "step": 8453 }, { "epoch": 0.54, "grad_norm": 1.6328646181348434, "learning_rate": 4.577589417278476e-06, "loss": 0.687, "step": 8454 }, { "epoch": 0.54, "grad_norm": 1.4820272366488918, "learning_rate": 4.576556576473158e-06, "loss": 0.7221, "step": 8455 }, { "epoch": 0.54, "grad_norm": 2.0861048316941964, "learning_rate": 4.575523753866552e-06, "loss": 0.5788, "step": 8456 }, { "epoch": 0.54, "grad_norm": 1.5632208354241175, "learning_rate": 4.57449094950304e-06, "loss": 0.6243, "step": 8457 }, { "epoch": 0.54, "grad_norm": 1.8962930948394245, "learning_rate": 4.573458163427014e-06, "loss": 0.723, "step": 8458 }, { "epoch": 0.54, "grad_norm": 1.4457084168128451, "learning_rate": 4.572425395682859e-06, "loss": 0.6093, "step": 8459 }, { "epoch": 0.54, "grad_norm": 2.283693754598053, "learning_rate": 4.571392646314965e-06, "loss": 0.7457, "step": 8460 }, { "epoch": 0.54, "grad_norm": 1.9538727692089768, "learning_rate": 4.570359915367711e-06, "loss": 0.7452, "step": 8461 }, { "epoch": 0.54, "grad_norm": 1.4840496310361404, "learning_rate": 4.569327202885488e-06, "loss": 0.7792, "step": 8462 }, { "epoch": 0.54, "grad_norm": 1.433343464225676, "learning_rate": 4.568294508912674e-06, "loss": 0.6628, "step": 8463 }, { "epoch": 0.54, "grad_norm": 1.3483936188624464, "learning_rate": 4.567261833493655e-06, "loss": 0.5645, "step": 8464 }, { "epoch": 0.54, "grad_norm": 1.7158357770637254, "learning_rate": 4.566229176672815e-06, "loss": 0.728, "step": 8465 }, { "epoch": 0.54, "grad_norm": 1.2933955460083197, "learning_rate": 4.565196538494532e-06, "loss": 0.6506, "step": 8466 }, { "epoch": 0.54, "grad_norm": 1.5218622772871664, "learning_rate": 4.5641639190031894e-06, "loss": 0.5413, "step": 8467 }, { "epoch": 0.54, "grad_norm": 1.4353029312026235, "learning_rate": 4.563131318243164e-06, "loss": 0.7414, "step": 8468 }, { "epoch": 0.54, "grad_norm": 1.4983896271288957, "learning_rate": 4.56209873625884e-06, "loss": 0.7233, "step": 8469 }, { "epoch": 0.54, "grad_norm": 1.5724373688387394, "learning_rate": 4.5610661730945884e-06, "loss": 0.6762, "step": 8470 }, { "epoch": 0.54, "grad_norm": 1.4415364358517015, "learning_rate": 4.560033628794792e-06, "loss": 0.6868, "step": 8471 }, { "epoch": 0.54, "grad_norm": 1.7386979914774905, "learning_rate": 4.559001103403825e-06, "loss": 0.6649, "step": 8472 }, { "epoch": 0.54, "grad_norm": 1.5664644280135693, "learning_rate": 4.557968596966063e-06, "loss": 0.7797, "step": 8473 }, { "epoch": 0.54, "grad_norm": 1.5997957976055794, "learning_rate": 4.5569361095258854e-06, "loss": 0.7201, "step": 8474 }, { "epoch": 0.54, "grad_norm": 1.0352141772462922, "learning_rate": 4.5559036411276596e-06, "loss": 0.7007, "step": 8475 }, { "epoch": 0.54, "grad_norm": 1.6005564130979226, "learning_rate": 4.554871191815764e-06, "loss": 0.7063, "step": 8476 }, { "epoch": 0.54, "grad_norm": 1.663361332776807, "learning_rate": 4.553838761634569e-06, "loss": 0.6834, "step": 8477 }, { "epoch": 0.54, "grad_norm": 1.3486536347254088, "learning_rate": 4.552806350628446e-06, "loss": 0.7098, "step": 8478 }, { "epoch": 0.54, "grad_norm": 1.6226573751789939, "learning_rate": 4.551773958841765e-06, "loss": 0.7516, "step": 8479 }, { "epoch": 0.54, "grad_norm": 1.547303805718704, "learning_rate": 4.5507415863189005e-06, "loss": 0.6184, "step": 8480 }, { "epoch": 0.54, "grad_norm": 1.5670832431516266, "learning_rate": 4.549709233104216e-06, "loss": 0.6995, "step": 8481 }, { "epoch": 0.54, "grad_norm": 1.752366569582288, "learning_rate": 4.548676899242082e-06, "loss": 0.7077, "step": 8482 }, { "epoch": 0.54, "grad_norm": 1.0868752272329216, "learning_rate": 4.54764458477687e-06, "loss": 0.6748, "step": 8483 }, { "epoch": 0.54, "grad_norm": 1.3859897562598968, "learning_rate": 4.546612289752939e-06, "loss": 0.6715, "step": 8484 }, { "epoch": 0.54, "grad_norm": 1.4860374987370246, "learning_rate": 4.5455800142146626e-06, "loss": 0.6486, "step": 8485 }, { "epoch": 0.54, "grad_norm": 1.4767225814211347, "learning_rate": 4.5445477582064e-06, "loss": 0.6233, "step": 8486 }, { "epoch": 0.54, "grad_norm": 1.5540995964324489, "learning_rate": 4.54351552177252e-06, "loss": 0.6511, "step": 8487 }, { "epoch": 0.54, "grad_norm": 1.393940366021886, "learning_rate": 4.542483304957381e-06, "loss": 0.6176, "step": 8488 }, { "epoch": 0.54, "grad_norm": 1.5169012093077938, "learning_rate": 4.541451107805351e-06, "loss": 0.6908, "step": 8489 }, { "epoch": 0.54, "grad_norm": 1.3120427928078924, "learning_rate": 4.540418930360786e-06, "loss": 0.618, "step": 8490 }, { "epoch": 0.54, "grad_norm": 1.4609778039811252, "learning_rate": 4.53938677266805e-06, "loss": 0.6565, "step": 8491 }, { "epoch": 0.54, "grad_norm": 1.5065371030945265, "learning_rate": 4.5383546347715056e-06, "loss": 0.6047, "step": 8492 }, { "epoch": 0.54, "grad_norm": 1.028761361083226, "learning_rate": 4.537322516715505e-06, "loss": 0.5885, "step": 8493 }, { "epoch": 0.54, "grad_norm": 1.4987526281355676, "learning_rate": 4.5362904185444125e-06, "loss": 0.6422, "step": 8494 }, { "epoch": 0.54, "grad_norm": 1.4777277678308922, "learning_rate": 4.535258340302583e-06, "loss": 0.6589, "step": 8495 }, { "epoch": 0.54, "grad_norm": 1.5883320491570994, "learning_rate": 4.534226282034375e-06, "loss": 0.6229, "step": 8496 }, { "epoch": 0.54, "grad_norm": 1.4348916387963893, "learning_rate": 4.53319424378414e-06, "loss": 0.6684, "step": 8497 }, { "epoch": 0.54, "grad_norm": 1.630496220944128, "learning_rate": 4.532162225596239e-06, "loss": 0.6458, "step": 8498 }, { "epoch": 0.54, "grad_norm": 1.5931610543982093, "learning_rate": 4.531130227515019e-06, "loss": 0.7334, "step": 8499 }, { "epoch": 0.54, "grad_norm": 1.4738258502232264, "learning_rate": 4.530098249584838e-06, "loss": 0.7643, "step": 8500 }, { "epoch": 0.54, "grad_norm": 1.3449054549001938, "learning_rate": 4.529066291850047e-06, "loss": 0.5935, "step": 8501 }, { "epoch": 0.54, "grad_norm": 1.407602500538279, "learning_rate": 4.528034354354996e-06, "loss": 0.5953, "step": 8502 }, { "epoch": 0.54, "grad_norm": 1.4741433787236415, "learning_rate": 4.527002437144039e-06, "loss": 0.6977, "step": 8503 }, { "epoch": 0.54, "grad_norm": 1.6130797931949423, "learning_rate": 4.525970540261521e-06, "loss": 0.6803, "step": 8504 }, { "epoch": 0.54, "grad_norm": 1.5047813959116214, "learning_rate": 4.524938663751796e-06, "loss": 0.6891, "step": 8505 }, { "epoch": 0.54, "grad_norm": 1.831283588462826, "learning_rate": 4.523906807659206e-06, "loss": 0.6822, "step": 8506 }, { "epoch": 0.54, "grad_norm": 1.7719520518414884, "learning_rate": 4.522874972028103e-06, "loss": 0.7908, "step": 8507 }, { "epoch": 0.54, "grad_norm": 1.7249994451771333, "learning_rate": 4.521843156902829e-06, "loss": 0.6807, "step": 8508 }, { "epoch": 0.54, "grad_norm": 1.4282026569639588, "learning_rate": 4.520811362327732e-06, "loss": 0.6719, "step": 8509 }, { "epoch": 0.54, "grad_norm": 1.314697246655624, "learning_rate": 4.519779588347158e-06, "loss": 0.7577, "step": 8510 }, { "epoch": 0.54, "grad_norm": 1.4310587329728244, "learning_rate": 4.518747835005445e-06, "loss": 0.6917, "step": 8511 }, { "epoch": 0.54, "grad_norm": 0.9920288749989635, "learning_rate": 4.5177161023469414e-06, "loss": 0.6079, "step": 8512 }, { "epoch": 0.54, "grad_norm": 1.5995968042092308, "learning_rate": 4.516684390415985e-06, "loss": 0.7177, "step": 8513 }, { "epoch": 0.54, "grad_norm": 1.5248092490953762, "learning_rate": 4.515652699256918e-06, "loss": 0.6411, "step": 8514 }, { "epoch": 0.55, "grad_norm": 1.4885901111542423, "learning_rate": 4.51462102891408e-06, "loss": 0.6289, "step": 8515 }, { "epoch": 0.55, "grad_norm": 1.9057136933018488, "learning_rate": 4.513589379431812e-06, "loss": 0.6874, "step": 8516 }, { "epoch": 0.55, "grad_norm": 1.441858763624294, "learning_rate": 4.512557750854448e-06, "loss": 0.6104, "step": 8517 }, { "epoch": 0.55, "grad_norm": 1.7529849729088895, "learning_rate": 4.511526143226328e-06, "loss": 0.7919, "step": 8518 }, { "epoch": 0.55, "grad_norm": 1.619333162213, "learning_rate": 4.51049455659179e-06, "loss": 0.6885, "step": 8519 }, { "epoch": 0.55, "grad_norm": 1.5954982333986665, "learning_rate": 4.509462990995165e-06, "loss": 0.6857, "step": 8520 }, { "epoch": 0.55, "grad_norm": 1.5740010441480397, "learning_rate": 4.508431446480792e-06, "loss": 0.6444, "step": 8521 }, { "epoch": 0.55, "grad_norm": 1.403468336478303, "learning_rate": 4.507399923093e-06, "loss": 0.6518, "step": 8522 }, { "epoch": 0.55, "grad_norm": 1.6022748271856087, "learning_rate": 4.506368420876127e-06, "loss": 0.6688, "step": 8523 }, { "epoch": 0.55, "grad_norm": 1.6846043861872404, "learning_rate": 4.5053369398745e-06, "loss": 0.566, "step": 8524 }, { "epoch": 0.55, "grad_norm": 1.4240600269789796, "learning_rate": 4.504305480132454e-06, "loss": 0.6139, "step": 8525 }, { "epoch": 0.55, "grad_norm": 1.5040467229450671, "learning_rate": 4.5032740416943145e-06, "loss": 0.7326, "step": 8526 }, { "epoch": 0.55, "grad_norm": 1.7206750133608706, "learning_rate": 4.502242624604413e-06, "loss": 0.7912, "step": 8527 }, { "epoch": 0.55, "grad_norm": 1.5876884454789673, "learning_rate": 4.501211228907078e-06, "loss": 0.7509, "step": 8528 }, { "epoch": 0.55, "grad_norm": 1.5767834194798118, "learning_rate": 4.5001798546466365e-06, "loss": 0.7603, "step": 8529 }, { "epoch": 0.55, "grad_norm": 1.5646786717873404, "learning_rate": 4.4991485018674154e-06, "loss": 0.6872, "step": 8530 }, { "epoch": 0.55, "grad_norm": 1.6004374736107885, "learning_rate": 4.498117170613738e-06, "loss": 0.6796, "step": 8531 }, { "epoch": 0.55, "grad_norm": 1.506065622813923, "learning_rate": 4.497085860929932e-06, "loss": 0.615, "step": 8532 }, { "epoch": 0.55, "grad_norm": 1.4791553602043837, "learning_rate": 4.496054572860317e-06, "loss": 0.7369, "step": 8533 }, { "epoch": 0.55, "grad_norm": 1.4512061536968646, "learning_rate": 4.495023306449219e-06, "loss": 0.6272, "step": 8534 }, { "epoch": 0.55, "grad_norm": 1.3975694176502784, "learning_rate": 4.493992061740956e-06, "loss": 0.6306, "step": 8535 }, { "epoch": 0.55, "grad_norm": 1.5903374121887577, "learning_rate": 4.492960838779853e-06, "loss": 0.7743, "step": 8536 }, { "epoch": 0.55, "grad_norm": 1.4758545182995064, "learning_rate": 4.491929637610225e-06, "loss": 0.6798, "step": 8537 }, { "epoch": 0.55, "grad_norm": 1.4885047198769592, "learning_rate": 4.490898458276395e-06, "loss": 0.6169, "step": 8538 }, { "epoch": 0.55, "grad_norm": 1.6115383515540518, "learning_rate": 4.489867300822681e-06, "loss": 0.7539, "step": 8539 }, { "epoch": 0.55, "grad_norm": 1.1493792754360581, "learning_rate": 4.488836165293397e-06, "loss": 0.6895, "step": 8540 }, { "epoch": 0.55, "grad_norm": 1.6063189667135205, "learning_rate": 4.4878050517328625e-06, "loss": 0.6801, "step": 8541 }, { "epoch": 0.55, "grad_norm": 1.7961599224293594, "learning_rate": 4.486773960185388e-06, "loss": 0.7289, "step": 8542 }, { "epoch": 0.55, "grad_norm": 1.6563153306442497, "learning_rate": 4.485742890695292e-06, "loss": 0.6278, "step": 8543 }, { "epoch": 0.55, "grad_norm": 1.2256796092944224, "learning_rate": 4.484711843306884e-06, "loss": 0.6287, "step": 8544 }, { "epoch": 0.55, "grad_norm": 1.3381489975124221, "learning_rate": 4.483680818064481e-06, "loss": 0.723, "step": 8545 }, { "epoch": 0.55, "grad_norm": 1.6540218904255064, "learning_rate": 4.482649815012389e-06, "loss": 0.6813, "step": 8546 }, { "epoch": 0.55, "grad_norm": 1.7335758002706783, "learning_rate": 4.481618834194921e-06, "loss": 0.6493, "step": 8547 }, { "epoch": 0.55, "grad_norm": 1.6036276586914506, "learning_rate": 4.480587875656388e-06, "loss": 0.698, "step": 8548 }, { "epoch": 0.55, "grad_norm": 1.5245084171455754, "learning_rate": 4.479556939441095e-06, "loss": 0.6581, "step": 8549 }, { "epoch": 0.55, "grad_norm": 1.5576869252892493, "learning_rate": 4.478526025593352e-06, "loss": 0.7391, "step": 8550 }, { "epoch": 0.55, "grad_norm": 1.218470515274893, "learning_rate": 4.477495134157464e-06, "loss": 0.818, "step": 8551 }, { "epoch": 0.55, "grad_norm": 1.4623101696466565, "learning_rate": 4.4764642651777385e-06, "loss": 0.7073, "step": 8552 }, { "epoch": 0.55, "grad_norm": 1.4836029133259807, "learning_rate": 4.475433418698477e-06, "loss": 0.7035, "step": 8553 }, { "epoch": 0.55, "grad_norm": 2.2622011837406695, "learning_rate": 4.474402594763987e-06, "loss": 0.6849, "step": 8554 }, { "epoch": 0.55, "grad_norm": 1.6582558955171365, "learning_rate": 4.473371793418567e-06, "loss": 0.6722, "step": 8555 }, { "epoch": 0.55, "grad_norm": 1.5603345232648764, "learning_rate": 4.4723410147065215e-06, "loss": 0.6434, "step": 8556 }, { "epoch": 0.55, "grad_norm": 1.0437392466280366, "learning_rate": 4.471310258672151e-06, "loss": 0.647, "step": 8557 }, { "epoch": 0.55, "grad_norm": 1.4605944392831738, "learning_rate": 4.470279525359753e-06, "loss": 0.7396, "step": 8558 }, { "epoch": 0.55, "grad_norm": 1.4266940798150585, "learning_rate": 4.469248814813631e-06, "loss": 0.5983, "step": 8559 }, { "epoch": 0.55, "grad_norm": 1.5377895596477058, "learning_rate": 4.468218127078076e-06, "loss": 0.7684, "step": 8560 }, { "epoch": 0.55, "grad_norm": 1.4526259861923905, "learning_rate": 4.467187462197392e-06, "loss": 0.5675, "step": 8561 }, { "epoch": 0.55, "grad_norm": 1.5374769649101274, "learning_rate": 4.466156820215868e-06, "loss": 0.6916, "step": 8562 }, { "epoch": 0.55, "grad_norm": 3.4345765933877197, "learning_rate": 4.465126201177804e-06, "loss": 0.6285, "step": 8563 }, { "epoch": 0.55, "grad_norm": 1.5928743970669916, "learning_rate": 4.464095605127491e-06, "loss": 0.6247, "step": 8564 }, { "epoch": 0.55, "grad_norm": 4.139884970403183, "learning_rate": 4.463065032109224e-06, "loss": 0.6533, "step": 8565 }, { "epoch": 0.55, "grad_norm": 1.4728181300077021, "learning_rate": 4.462034482167293e-06, "loss": 0.6447, "step": 8566 }, { "epoch": 0.55, "grad_norm": 1.4121383250809403, "learning_rate": 4.461003955345989e-06, "loss": 0.5518, "step": 8567 }, { "epoch": 0.55, "grad_norm": 1.6200492605920493, "learning_rate": 4.459973451689604e-06, "loss": 0.7738, "step": 8568 }, { "epoch": 0.55, "grad_norm": 1.4385728846435912, "learning_rate": 4.458942971242423e-06, "loss": 0.7474, "step": 8569 }, { "epoch": 0.55, "grad_norm": 1.4595688560383817, "learning_rate": 4.457912514048739e-06, "loss": 0.5889, "step": 8570 }, { "epoch": 0.55, "grad_norm": 1.4161521254735174, "learning_rate": 4.456882080152834e-06, "loss": 0.6439, "step": 8571 }, { "epoch": 0.55, "grad_norm": 1.4616502806359595, "learning_rate": 4.455851669598997e-06, "loss": 0.6895, "step": 8572 }, { "epoch": 0.55, "grad_norm": 1.5489053128264165, "learning_rate": 4.454821282431511e-06, "loss": 0.6823, "step": 8573 }, { "epoch": 0.55, "grad_norm": 1.4041027709308709, "learning_rate": 4.45379091869466e-06, "loss": 0.6086, "step": 8574 }, { "epoch": 0.55, "grad_norm": 1.657057131574007, "learning_rate": 4.4527605784327295e-06, "loss": 0.7017, "step": 8575 }, { "epoch": 0.55, "grad_norm": 1.574697075802247, "learning_rate": 4.451730261689998e-06, "loss": 0.7129, "step": 8576 }, { "epoch": 0.55, "grad_norm": 1.6557291560945195, "learning_rate": 4.450699968510749e-06, "loss": 0.6002, "step": 8577 }, { "epoch": 0.55, "grad_norm": 1.3035615589937757, "learning_rate": 4.44966969893926e-06, "loss": 0.6319, "step": 8578 }, { "epoch": 0.55, "grad_norm": 1.539565609576843, "learning_rate": 4.448639453019812e-06, "loss": 0.771, "step": 8579 }, { "epoch": 0.55, "grad_norm": 1.433617960922979, "learning_rate": 4.4476092307966805e-06, "loss": 0.7056, "step": 8580 }, { "epoch": 0.55, "grad_norm": 1.6744254354700896, "learning_rate": 4.446579032314145e-06, "loss": 0.6125, "step": 8581 }, { "epoch": 0.55, "grad_norm": 1.7747884471233104, "learning_rate": 4.445548857616478e-06, "loss": 0.7255, "step": 8582 }, { "epoch": 0.55, "grad_norm": 1.2127606597864695, "learning_rate": 4.444518706747956e-06, "loss": 0.6995, "step": 8583 }, { "epoch": 0.55, "grad_norm": 1.5127488055947407, "learning_rate": 4.443488579752855e-06, "loss": 0.7893, "step": 8584 }, { "epoch": 0.55, "grad_norm": 1.5509594724347544, "learning_rate": 4.442458476675443e-06, "loss": 0.6761, "step": 8585 }, { "epoch": 0.55, "grad_norm": 1.5684528860126286, "learning_rate": 4.441428397559996e-06, "loss": 0.7343, "step": 8586 }, { "epoch": 0.55, "grad_norm": 1.3679215909114484, "learning_rate": 4.440398342450782e-06, "loss": 0.6758, "step": 8587 }, { "epoch": 0.55, "grad_norm": 1.8017902092942557, "learning_rate": 4.439368311392071e-06, "loss": 0.6605, "step": 8588 }, { "epoch": 0.55, "grad_norm": 1.402760913109007, "learning_rate": 4.438338304428132e-06, "loss": 0.771, "step": 8589 }, { "epoch": 0.55, "grad_norm": 1.4423359163854352, "learning_rate": 4.437308321603234e-06, "loss": 0.7122, "step": 8590 }, { "epoch": 0.55, "grad_norm": 1.0803183392039766, "learning_rate": 4.43627836296164e-06, "loss": 0.6354, "step": 8591 }, { "epoch": 0.55, "grad_norm": 1.7156566508382267, "learning_rate": 4.435248428547618e-06, "loss": 0.6918, "step": 8592 }, { "epoch": 0.55, "grad_norm": 1.531721902613555, "learning_rate": 4.434218518405432e-06, "loss": 0.7177, "step": 8593 }, { "epoch": 0.55, "grad_norm": 1.0993810015830996, "learning_rate": 4.433188632579344e-06, "loss": 0.6118, "step": 8594 }, { "epoch": 0.55, "grad_norm": 1.3578375617218164, "learning_rate": 4.43215877111362e-06, "loss": 0.5748, "step": 8595 }, { "epoch": 0.55, "grad_norm": 1.5017784958216096, "learning_rate": 4.431128934052517e-06, "loss": 0.7131, "step": 8596 }, { "epoch": 0.55, "grad_norm": 1.5640928958717044, "learning_rate": 4.4300991214402986e-06, "loss": 0.6988, "step": 8597 }, { "epoch": 0.55, "grad_norm": 1.4775511942356905, "learning_rate": 4.429069333321221e-06, "loss": 0.6835, "step": 8598 }, { "epoch": 0.55, "grad_norm": 1.5345040419429816, "learning_rate": 4.428039569739544e-06, "loss": 0.6367, "step": 8599 }, { "epoch": 0.55, "grad_norm": 1.3714629091929902, "learning_rate": 4.427009830739524e-06, "loss": 0.6437, "step": 8600 }, { "epoch": 0.55, "grad_norm": 1.56819619822323, "learning_rate": 4.425980116365419e-06, "loss": 0.7021, "step": 8601 }, { "epoch": 0.55, "grad_norm": 1.471756468977831, "learning_rate": 4.4249504266614814e-06, "loss": 0.7057, "step": 8602 }, { "epoch": 0.55, "grad_norm": 1.8123862656454521, "learning_rate": 4.423920761671965e-06, "loss": 0.6754, "step": 8603 }, { "epoch": 0.55, "grad_norm": 1.5109602223713872, "learning_rate": 4.422891121441126e-06, "loss": 0.6841, "step": 8604 }, { "epoch": 0.55, "grad_norm": 1.6019691025608822, "learning_rate": 4.421861506013213e-06, "loss": 0.6359, "step": 8605 }, { "epoch": 0.55, "grad_norm": 1.4850710696564766, "learning_rate": 4.420831915432477e-06, "loss": 0.7184, "step": 8606 }, { "epoch": 0.55, "grad_norm": 1.469813529920101, "learning_rate": 4.419802349743169e-06, "loss": 0.6317, "step": 8607 }, { "epoch": 0.55, "grad_norm": 1.5043098434524018, "learning_rate": 4.418772808989537e-06, "loss": 0.693, "step": 8608 }, { "epoch": 0.55, "grad_norm": 1.6007512987003323, "learning_rate": 4.417743293215827e-06, "loss": 0.6705, "step": 8609 }, { "epoch": 0.55, "grad_norm": 1.5654300562147607, "learning_rate": 4.416713802466289e-06, "loss": 0.6922, "step": 8610 }, { "epoch": 0.55, "grad_norm": 1.6541403137985173, "learning_rate": 4.415684336785165e-06, "loss": 0.6714, "step": 8611 }, { "epoch": 0.55, "grad_norm": 1.4256219096520475, "learning_rate": 4.414654896216699e-06, "loss": 0.7377, "step": 8612 }, { "epoch": 0.55, "grad_norm": 1.5393639762100588, "learning_rate": 4.413625480805138e-06, "loss": 0.7159, "step": 8613 }, { "epoch": 0.55, "grad_norm": 1.6802511757005467, "learning_rate": 4.4125960905947195e-06, "loss": 0.6862, "step": 8614 }, { "epoch": 0.55, "grad_norm": 1.5922020793351674, "learning_rate": 4.411566725629688e-06, "loss": 0.7378, "step": 8615 }, { "epoch": 0.55, "grad_norm": 1.4931688166275312, "learning_rate": 4.410537385954282e-06, "loss": 0.6325, "step": 8616 }, { "epoch": 0.55, "grad_norm": 1.6830039557985892, "learning_rate": 4.409508071612741e-06, "loss": 0.6852, "step": 8617 }, { "epoch": 0.55, "grad_norm": 1.5958803908298218, "learning_rate": 4.408478782649301e-06, "loss": 0.7382, "step": 8618 }, { "epoch": 0.55, "grad_norm": 1.1057476643117, "learning_rate": 4.407449519108203e-06, "loss": 0.7803, "step": 8619 }, { "epoch": 0.55, "grad_norm": 1.7746604969654527, "learning_rate": 4.4064202810336755e-06, "loss": 0.7353, "step": 8620 }, { "epoch": 0.55, "grad_norm": 1.8103383261742771, "learning_rate": 4.405391068469958e-06, "loss": 0.681, "step": 8621 }, { "epoch": 0.55, "grad_norm": 1.6087477808596593, "learning_rate": 4.404361881461285e-06, "loss": 0.7103, "step": 8622 }, { "epoch": 0.55, "grad_norm": 1.4767769339499466, "learning_rate": 4.403332720051884e-06, "loss": 0.7173, "step": 8623 }, { "epoch": 0.55, "grad_norm": 1.4720761498144674, "learning_rate": 4.402303584285991e-06, "loss": 0.6742, "step": 8624 }, { "epoch": 0.55, "grad_norm": 1.5777671143366852, "learning_rate": 4.401274474207833e-06, "loss": 0.6872, "step": 8625 }, { "epoch": 0.55, "grad_norm": 1.5547934043122036, "learning_rate": 4.400245389861642e-06, "loss": 0.7097, "step": 8626 }, { "epoch": 0.55, "grad_norm": 1.6002736965064621, "learning_rate": 4.3992163312916404e-06, "loss": 0.6775, "step": 8627 }, { "epoch": 0.55, "grad_norm": 1.7706994851294768, "learning_rate": 4.398187298542061e-06, "loss": 0.7488, "step": 8628 }, { "epoch": 0.55, "grad_norm": 1.5889677828938467, "learning_rate": 4.397158291657125e-06, "loss": 0.6677, "step": 8629 }, { "epoch": 0.55, "grad_norm": 1.4442151498120293, "learning_rate": 4.39612931068106e-06, "loss": 0.6959, "step": 8630 }, { "epoch": 0.55, "grad_norm": 1.445023978096313, "learning_rate": 4.39510035565809e-06, "loss": 0.7084, "step": 8631 }, { "epoch": 0.55, "grad_norm": 1.68459687737365, "learning_rate": 4.394071426632433e-06, "loss": 0.7644, "step": 8632 }, { "epoch": 0.55, "grad_norm": 1.4419418117455158, "learning_rate": 4.393042523648315e-06, "loss": 0.644, "step": 8633 }, { "epoch": 0.55, "grad_norm": 1.6074125424058239, "learning_rate": 4.392013646749952e-06, "loss": 0.7387, "step": 8634 }, { "epoch": 0.55, "grad_norm": 1.3960250507802927, "learning_rate": 4.390984795981566e-06, "loss": 0.6829, "step": 8635 }, { "epoch": 0.55, "grad_norm": 1.684911084033809, "learning_rate": 4.389955971387373e-06, "loss": 0.7314, "step": 8636 }, { "epoch": 0.55, "grad_norm": 1.5633270164459057, "learning_rate": 4.388927173011592e-06, "loss": 0.8336, "step": 8637 }, { "epoch": 0.55, "grad_norm": 1.5626136320820747, "learning_rate": 4.387898400898436e-06, "loss": 0.677, "step": 8638 }, { "epoch": 0.55, "grad_norm": 1.5734007797506784, "learning_rate": 4.38686965509212e-06, "loss": 0.6825, "step": 8639 }, { "epoch": 0.55, "grad_norm": 1.0417828739120545, "learning_rate": 4.385840935636859e-06, "loss": 0.682, "step": 8640 }, { "epoch": 0.55, "grad_norm": 1.4635400700211256, "learning_rate": 4.384812242576863e-06, "loss": 0.6325, "step": 8641 }, { "epoch": 0.55, "grad_norm": 1.5588866249764712, "learning_rate": 4.383783575956345e-06, "loss": 0.6034, "step": 8642 }, { "epoch": 0.55, "grad_norm": 1.9983929673436305, "learning_rate": 4.382754935819514e-06, "loss": 0.6633, "step": 8643 }, { "epoch": 0.55, "grad_norm": 1.110382130721127, "learning_rate": 4.381726322210579e-06, "loss": 0.6682, "step": 8644 }, { "epoch": 0.55, "grad_norm": 1.5001342447283301, "learning_rate": 4.380697735173745e-06, "loss": 0.7566, "step": 8645 }, { "epoch": 0.55, "grad_norm": 1.1071456923631962, "learning_rate": 4.379669174753226e-06, "loss": 0.6936, "step": 8646 }, { "epoch": 0.55, "grad_norm": 1.6560153406498161, "learning_rate": 4.378640640993218e-06, "loss": 0.6754, "step": 8647 }, { "epoch": 0.55, "grad_norm": 1.3460705903888286, "learning_rate": 4.37761213393793e-06, "loss": 0.6069, "step": 8648 }, { "epoch": 0.55, "grad_norm": 1.5453876268508069, "learning_rate": 4.376583653631567e-06, "loss": 0.6469, "step": 8649 }, { "epoch": 0.55, "grad_norm": 1.613919947357359, "learning_rate": 4.3755552001183265e-06, "loss": 0.707, "step": 8650 }, { "epoch": 0.55, "grad_norm": 1.5131091310240379, "learning_rate": 4.374526773442413e-06, "loss": 0.6508, "step": 8651 }, { "epoch": 0.55, "grad_norm": 1.5467544389583818, "learning_rate": 4.373498373648022e-06, "loss": 0.7204, "step": 8652 }, { "epoch": 0.55, "grad_norm": 1.5054571130881256, "learning_rate": 4.372470000779357e-06, "loss": 0.6837, "step": 8653 }, { "epoch": 0.55, "grad_norm": 1.1755680747011628, "learning_rate": 4.371441654880612e-06, "loss": 0.697, "step": 8654 }, { "epoch": 0.55, "grad_norm": 1.4482376943061093, "learning_rate": 4.370413335995985e-06, "loss": 0.7368, "step": 8655 }, { "epoch": 0.55, "grad_norm": 1.5241736282796177, "learning_rate": 4.369385044169667e-06, "loss": 0.6939, "step": 8656 }, { "epoch": 0.55, "grad_norm": 1.401754763620807, "learning_rate": 4.368356779445856e-06, "loss": 0.7409, "step": 8657 }, { "epoch": 0.55, "grad_norm": 1.6418386678729564, "learning_rate": 4.367328541868744e-06, "loss": 0.6091, "step": 8658 }, { "epoch": 0.55, "grad_norm": 1.289888476700932, "learning_rate": 4.36630033148252e-06, "loss": 0.6513, "step": 8659 }, { "epoch": 0.55, "grad_norm": 1.4443531291042486, "learning_rate": 4.36527214833138e-06, "loss": 0.7048, "step": 8660 }, { "epoch": 0.55, "grad_norm": 1.4496511081350767, "learning_rate": 4.364243992459506e-06, "loss": 0.6691, "step": 8661 }, { "epoch": 0.55, "grad_norm": 1.5604419421778417, "learning_rate": 4.363215863911091e-06, "loss": 0.6451, "step": 8662 }, { "epoch": 0.55, "grad_norm": 1.6891963315934049, "learning_rate": 4.362187762730319e-06, "loss": 0.6528, "step": 8663 }, { "epoch": 0.55, "grad_norm": 1.712438494177452, "learning_rate": 4.3611596889613775e-06, "loss": 0.7675, "step": 8664 }, { "epoch": 0.55, "grad_norm": 1.4179907941708718, "learning_rate": 4.360131642648449e-06, "loss": 0.7343, "step": 8665 }, { "epoch": 0.55, "grad_norm": 1.641473346958274, "learning_rate": 4.359103623835718e-06, "loss": 0.6735, "step": 8666 }, { "epoch": 0.55, "grad_norm": 1.4931012740783929, "learning_rate": 4.358075632567368e-06, "loss": 0.6578, "step": 8667 }, { "epoch": 0.55, "grad_norm": 1.4631891193801292, "learning_rate": 4.357047668887577e-06, "loss": 0.676, "step": 8668 }, { "epoch": 0.55, "grad_norm": 1.3847135561349502, "learning_rate": 4.356019732840528e-06, "loss": 0.659, "step": 8669 }, { "epoch": 0.55, "grad_norm": 1.5568058979762716, "learning_rate": 4.354991824470396e-06, "loss": 0.7197, "step": 8670 }, { "epoch": 0.55, "grad_norm": 1.774561730740675, "learning_rate": 4.3539639438213606e-06, "loss": 0.6434, "step": 8671 }, { "epoch": 0.56, "grad_norm": 1.4259927116112394, "learning_rate": 4.3529360909375966e-06, "loss": 0.6292, "step": 8672 }, { "epoch": 0.56, "grad_norm": 1.4319014635951015, "learning_rate": 4.351908265863282e-06, "loss": 0.7364, "step": 8673 }, { "epoch": 0.56, "grad_norm": 1.410376532239805, "learning_rate": 4.350880468642586e-06, "loss": 0.6361, "step": 8674 }, { "epoch": 0.56, "grad_norm": 1.5330460659066114, "learning_rate": 4.349852699319686e-06, "loss": 0.7007, "step": 8675 }, { "epoch": 0.56, "grad_norm": 1.9023951747107712, "learning_rate": 4.3488249579387475e-06, "loss": 0.6938, "step": 8676 }, { "epoch": 0.56, "grad_norm": 1.650955255770472, "learning_rate": 4.347797244543945e-06, "loss": 0.7168, "step": 8677 }, { "epoch": 0.56, "grad_norm": 1.6089067586365051, "learning_rate": 4.346769559179447e-06, "loss": 0.715, "step": 8678 }, { "epoch": 0.56, "grad_norm": 1.5645177986728087, "learning_rate": 4.34574190188942e-06, "loss": 0.6546, "step": 8679 }, { "epoch": 0.56, "grad_norm": 2.318250476082708, "learning_rate": 4.344714272718033e-06, "loss": 0.6291, "step": 8680 }, { "epoch": 0.56, "grad_norm": 1.474914100065584, "learning_rate": 4.343686671709449e-06, "loss": 0.5997, "step": 8681 }, { "epoch": 0.56, "grad_norm": 1.7364314244139398, "learning_rate": 4.342659098907833e-06, "loss": 0.7351, "step": 8682 }, { "epoch": 0.56, "grad_norm": 1.3848691464865195, "learning_rate": 4.341631554357347e-06, "loss": 0.5908, "step": 8683 }, { "epoch": 0.56, "grad_norm": 1.5300061390274697, "learning_rate": 4.340604038102154e-06, "loss": 0.707, "step": 8684 }, { "epoch": 0.56, "grad_norm": 1.448134556855019, "learning_rate": 4.339576550186413e-06, "loss": 0.5855, "step": 8685 }, { "epoch": 0.56, "grad_norm": 1.531199033722298, "learning_rate": 4.338549090654284e-06, "loss": 0.6122, "step": 8686 }, { "epoch": 0.56, "grad_norm": 1.3786494494080115, "learning_rate": 4.3375216595499254e-06, "loss": 0.6486, "step": 8687 }, { "epoch": 0.56, "grad_norm": 1.4484227733634871, "learning_rate": 4.336494256917494e-06, "loss": 0.652, "step": 8688 }, { "epoch": 0.56, "grad_norm": 1.6150704269604412, "learning_rate": 4.335466882801146e-06, "loss": 0.6007, "step": 8689 }, { "epoch": 0.56, "grad_norm": 1.5036397238722743, "learning_rate": 4.334439537245033e-06, "loss": 0.6276, "step": 8690 }, { "epoch": 0.56, "grad_norm": 1.6769019076037042, "learning_rate": 4.333412220293313e-06, "loss": 0.6517, "step": 8691 }, { "epoch": 0.56, "grad_norm": 1.3627651964474992, "learning_rate": 4.332384931990133e-06, "loss": 0.7282, "step": 8692 }, { "epoch": 0.56, "grad_norm": 1.30720945009527, "learning_rate": 4.3313576723796464e-06, "loss": 0.5454, "step": 8693 }, { "epoch": 0.56, "grad_norm": 1.6311512064897111, "learning_rate": 4.330330441506001e-06, "loss": 0.6822, "step": 8694 }, { "epoch": 0.56, "grad_norm": 1.5880373939638452, "learning_rate": 4.329303239413346e-06, "loss": 0.6835, "step": 8695 }, { "epoch": 0.56, "grad_norm": 1.6379645287369824, "learning_rate": 4.328276066145831e-06, "loss": 0.628, "step": 8696 }, { "epoch": 0.56, "grad_norm": 1.6329672295226734, "learning_rate": 4.327248921747597e-06, "loss": 0.8128, "step": 8697 }, { "epoch": 0.56, "grad_norm": 1.502419525839539, "learning_rate": 4.326221806262793e-06, "loss": 0.6658, "step": 8698 }, { "epoch": 0.56, "grad_norm": 1.417746671316944, "learning_rate": 4.325194719735557e-06, "loss": 0.7568, "step": 8699 }, { "epoch": 0.56, "grad_norm": 1.3831402250493217, "learning_rate": 4.324167662210035e-06, "loss": 0.7476, "step": 8700 }, { "epoch": 0.56, "grad_norm": 1.2305877296147179, "learning_rate": 4.3231406337303665e-06, "loss": 0.6536, "step": 8701 }, { "epoch": 0.56, "grad_norm": 1.4238787768794003, "learning_rate": 4.322113634340693e-06, "loss": 0.6303, "step": 8702 }, { "epoch": 0.56, "grad_norm": 1.6227502693827665, "learning_rate": 4.321086664085149e-06, "loss": 0.7011, "step": 8703 }, { "epoch": 0.56, "grad_norm": 1.175704012061953, "learning_rate": 4.320059723007874e-06, "loss": 0.7198, "step": 8704 }, { "epoch": 0.56, "grad_norm": 1.3547396503925104, "learning_rate": 4.319032811153005e-06, "loss": 0.5879, "step": 8705 }, { "epoch": 0.56, "grad_norm": 2.8078239499229047, "learning_rate": 4.318005928564672e-06, "loss": 0.6593, "step": 8706 }, { "epoch": 0.56, "grad_norm": 1.7381351158420584, "learning_rate": 4.316979075287014e-06, "loss": 0.6297, "step": 8707 }, { "epoch": 0.56, "grad_norm": 1.4753055434054054, "learning_rate": 4.315952251364158e-06, "loss": 0.6794, "step": 8708 }, { "epoch": 0.56, "grad_norm": 1.1124706184782387, "learning_rate": 4.314925456840239e-06, "loss": 0.7986, "step": 8709 }, { "epoch": 0.56, "grad_norm": 1.820948303196116, "learning_rate": 4.313898691759382e-06, "loss": 0.69, "step": 8710 }, { "epoch": 0.56, "grad_norm": 1.543880805451711, "learning_rate": 4.3128719561657205e-06, "loss": 0.7307, "step": 8711 }, { "epoch": 0.56, "grad_norm": 1.380942996831645, "learning_rate": 4.311845250103376e-06, "loss": 0.619, "step": 8712 }, { "epoch": 0.56, "grad_norm": 1.6154842184043012, "learning_rate": 4.310818573616476e-06, "loss": 0.6738, "step": 8713 }, { "epoch": 0.56, "grad_norm": 1.5814524117628244, "learning_rate": 4.309791926749147e-06, "loss": 0.6059, "step": 8714 }, { "epoch": 0.56, "grad_norm": 1.0571898015106551, "learning_rate": 4.30876530954551e-06, "loss": 0.6392, "step": 8715 }, { "epoch": 0.56, "grad_norm": 1.4525898990168118, "learning_rate": 4.3077387220496886e-06, "loss": 0.6434, "step": 8716 }, { "epoch": 0.56, "grad_norm": 1.4657292614167683, "learning_rate": 4.3067121643058e-06, "loss": 0.6409, "step": 8717 }, { "epoch": 0.56, "grad_norm": 1.0972565798915888, "learning_rate": 4.30568563635797e-06, "loss": 0.6522, "step": 8718 }, { "epoch": 0.56, "grad_norm": 1.4518712112311083, "learning_rate": 4.304659138250309e-06, "loss": 0.6085, "step": 8719 }, { "epoch": 0.56, "grad_norm": 1.5816058456314042, "learning_rate": 4.303632670026937e-06, "loss": 0.7561, "step": 8720 }, { "epoch": 0.56, "grad_norm": 1.5464705132148848, "learning_rate": 4.302606231731971e-06, "loss": 0.626, "step": 8721 }, { "epoch": 0.56, "grad_norm": 1.6488207510391848, "learning_rate": 4.301579823409523e-06, "loss": 0.7118, "step": 8722 }, { "epoch": 0.56, "grad_norm": 1.5157156464443482, "learning_rate": 4.300553445103707e-06, "loss": 0.7059, "step": 8723 }, { "epoch": 0.56, "grad_norm": 1.5846114817093098, "learning_rate": 4.299527096858633e-06, "loss": 0.6696, "step": 8724 }, { "epoch": 0.56, "grad_norm": 1.7252206981093754, "learning_rate": 4.298500778718415e-06, "loss": 0.5835, "step": 8725 }, { "epoch": 0.56, "grad_norm": 1.4158393320042397, "learning_rate": 4.297474490727157e-06, "loss": 0.7054, "step": 8726 }, { "epoch": 0.56, "grad_norm": 1.505217934661444, "learning_rate": 4.296448232928971e-06, "loss": 0.7133, "step": 8727 }, { "epoch": 0.56, "grad_norm": 1.6061127927210412, "learning_rate": 4.295422005367961e-06, "loss": 0.7165, "step": 8728 }, { "epoch": 0.56, "grad_norm": 1.2654199323183348, "learning_rate": 4.294395808088232e-06, "loss": 0.589, "step": 8729 }, { "epoch": 0.56, "grad_norm": 1.9566807881783566, "learning_rate": 4.2933696411338885e-06, "loss": 0.5705, "step": 8730 }, { "epoch": 0.56, "grad_norm": 1.6907918845450938, "learning_rate": 4.292343504549032e-06, "loss": 0.7301, "step": 8731 }, { "epoch": 0.56, "grad_norm": 1.6817678837261212, "learning_rate": 4.291317398377768e-06, "loss": 0.6609, "step": 8732 }, { "epoch": 0.56, "grad_norm": 1.6879656957530351, "learning_rate": 4.29029132266419e-06, "loss": 0.6045, "step": 8733 }, { "epoch": 0.56, "grad_norm": 1.4419312722389932, "learning_rate": 4.289265277452403e-06, "loss": 0.6802, "step": 8734 }, { "epoch": 0.56, "grad_norm": 1.5549193774388719, "learning_rate": 4.288239262786497e-06, "loss": 0.6615, "step": 8735 }, { "epoch": 0.56, "grad_norm": 1.496452385141477, "learning_rate": 4.287213278710574e-06, "loss": 0.7137, "step": 8736 }, { "epoch": 0.56, "grad_norm": 1.6957921612131495, "learning_rate": 4.286187325268726e-06, "loss": 0.6377, "step": 8737 }, { "epoch": 0.56, "grad_norm": 1.673487209265123, "learning_rate": 4.285161402505047e-06, "loss": 0.745, "step": 8738 }, { "epoch": 0.56, "grad_norm": 1.6202205888707268, "learning_rate": 4.284135510463628e-06, "loss": 0.7704, "step": 8739 }, { "epoch": 0.56, "grad_norm": 1.436446136616692, "learning_rate": 4.283109649188561e-06, "loss": 0.6842, "step": 8740 }, { "epoch": 0.56, "grad_norm": 1.0780726226927468, "learning_rate": 4.282083818723937e-06, "loss": 0.6203, "step": 8741 }, { "epoch": 0.56, "grad_norm": 1.7800786071869386, "learning_rate": 4.2810580191138385e-06, "loss": 0.5972, "step": 8742 }, { "epoch": 0.56, "grad_norm": 1.542939155476388, "learning_rate": 4.280032250402358e-06, "loss": 0.6471, "step": 8743 }, { "epoch": 0.56, "grad_norm": 1.5837989016786753, "learning_rate": 4.279006512633576e-06, "loss": 0.7483, "step": 8744 }, { "epoch": 0.56, "grad_norm": 1.6039233461528, "learning_rate": 4.2779808058515825e-06, "loss": 0.6437, "step": 8745 }, { "epoch": 0.56, "grad_norm": 1.6585181466199361, "learning_rate": 4.276955130100455e-06, "loss": 0.6788, "step": 8746 }, { "epoch": 0.56, "grad_norm": 1.6864411403021413, "learning_rate": 4.275929485424278e-06, "loss": 0.7099, "step": 8747 }, { "epoch": 0.56, "grad_norm": 1.5732843823854363, "learning_rate": 4.274903871867128e-06, "loss": 0.6886, "step": 8748 }, { "epoch": 0.56, "grad_norm": 1.4358747852829268, "learning_rate": 4.2738782894730876e-06, "loss": 0.6408, "step": 8749 }, { "epoch": 0.56, "grad_norm": 1.519631503965742, "learning_rate": 4.272852738286231e-06, "loss": 0.6958, "step": 8750 }, { "epoch": 0.56, "grad_norm": 1.4618123660728037, "learning_rate": 4.271827218350636e-06, "loss": 0.6782, "step": 8751 }, { "epoch": 0.56, "grad_norm": 1.6052778542108799, "learning_rate": 4.270801729710379e-06, "loss": 0.6781, "step": 8752 }, { "epoch": 0.56, "grad_norm": 1.7123596213185468, "learning_rate": 4.269776272409529e-06, "loss": 0.65, "step": 8753 }, { "epoch": 0.56, "grad_norm": 1.8826051096762502, "learning_rate": 4.268750846492163e-06, "loss": 0.7378, "step": 8754 }, { "epoch": 0.56, "grad_norm": 1.9841597268435862, "learning_rate": 4.2677254520023465e-06, "loss": 0.7546, "step": 8755 }, { "epoch": 0.56, "grad_norm": 1.4906689354240195, "learning_rate": 4.266700088984153e-06, "loss": 0.6686, "step": 8756 }, { "epoch": 0.56, "grad_norm": 1.4434068767477775, "learning_rate": 4.265674757481647e-06, "loss": 0.6789, "step": 8757 }, { "epoch": 0.56, "grad_norm": 1.3687480184639778, "learning_rate": 4.2646494575389e-06, "loss": 0.6475, "step": 8758 }, { "epoch": 0.56, "grad_norm": 1.4824365823750167, "learning_rate": 4.263624189199971e-06, "loss": 0.745, "step": 8759 }, { "epoch": 0.56, "grad_norm": 1.5676397226633991, "learning_rate": 4.262598952508927e-06, "loss": 0.7016, "step": 8760 }, { "epoch": 0.56, "grad_norm": 1.6332907879724434, "learning_rate": 4.261573747509833e-06, "loss": 0.6128, "step": 8761 }, { "epoch": 0.56, "grad_norm": 1.509655086038048, "learning_rate": 4.260548574246746e-06, "loss": 0.6818, "step": 8762 }, { "epoch": 0.56, "grad_norm": 1.6119987717044848, "learning_rate": 4.259523432763728e-06, "loss": 0.648, "step": 8763 }, { "epoch": 0.56, "grad_norm": 1.1911687636426849, "learning_rate": 4.258498323104837e-06, "loss": 0.6186, "step": 8764 }, { "epoch": 0.56, "grad_norm": 1.4270923425194972, "learning_rate": 4.25747324531413e-06, "loss": 0.675, "step": 8765 }, { "epoch": 0.56, "grad_norm": 1.5476366809889912, "learning_rate": 4.256448199435662e-06, "loss": 0.6711, "step": 8766 }, { "epoch": 0.56, "grad_norm": 1.4838248201531135, "learning_rate": 4.25542318551349e-06, "loss": 0.6666, "step": 8767 }, { "epoch": 0.56, "grad_norm": 1.3406525136445844, "learning_rate": 4.2543982035916625e-06, "loss": 0.66, "step": 8768 }, { "epoch": 0.56, "grad_norm": 1.3458963288051675, "learning_rate": 4.2533732537142335e-06, "loss": 0.5876, "step": 8769 }, { "epoch": 0.56, "grad_norm": 1.0982943403117926, "learning_rate": 4.252348335925255e-06, "loss": 0.626, "step": 8770 }, { "epoch": 0.56, "grad_norm": 1.489456365293813, "learning_rate": 4.2513234502687725e-06, "loss": 0.6788, "step": 8771 }, { "epoch": 0.56, "grad_norm": 1.5919892687928434, "learning_rate": 4.250298596788835e-06, "loss": 0.6784, "step": 8772 }, { "epoch": 0.56, "grad_norm": 1.3367071145805423, "learning_rate": 4.249273775529489e-06, "loss": 0.6773, "step": 8773 }, { "epoch": 0.56, "grad_norm": 1.5156091681829655, "learning_rate": 4.24824898653478e-06, "loss": 0.5694, "step": 8774 }, { "epoch": 0.56, "grad_norm": 1.3617085218714577, "learning_rate": 4.247224229848747e-06, "loss": 0.568, "step": 8775 }, { "epoch": 0.56, "grad_norm": 1.5364203387240778, "learning_rate": 4.246199505515438e-06, "loss": 0.6561, "step": 8776 }, { "epoch": 0.56, "grad_norm": 1.5098654864918248, "learning_rate": 4.245174813578887e-06, "loss": 0.6387, "step": 8777 }, { "epoch": 0.56, "grad_norm": 1.5690673456766466, "learning_rate": 4.244150154083137e-06, "loss": 0.7378, "step": 8778 }, { "epoch": 0.56, "grad_norm": 1.4570071385299945, "learning_rate": 4.243125527072227e-06, "loss": 0.6171, "step": 8779 }, { "epoch": 0.56, "grad_norm": 1.6604121835955525, "learning_rate": 4.24210093259019e-06, "loss": 0.6391, "step": 8780 }, { "epoch": 0.56, "grad_norm": 1.8556894070886032, "learning_rate": 4.241076370681064e-06, "loss": 0.7948, "step": 8781 }, { "epoch": 0.56, "grad_norm": 1.4809180858587139, "learning_rate": 4.240051841388878e-06, "loss": 0.6809, "step": 8782 }, { "epoch": 0.56, "grad_norm": 1.5048313812169654, "learning_rate": 4.239027344757671e-06, "loss": 0.6993, "step": 8783 }, { "epoch": 0.56, "grad_norm": 1.496524564703643, "learning_rate": 4.238002880831466e-06, "loss": 0.6869, "step": 8784 }, { "epoch": 0.56, "grad_norm": 1.377152897128203, "learning_rate": 4.2369784496542986e-06, "loss": 0.6207, "step": 8785 }, { "epoch": 0.56, "grad_norm": 1.539121552513063, "learning_rate": 4.235954051270192e-06, "loss": 0.652, "step": 8786 }, { "epoch": 0.56, "grad_norm": 1.5362343655718227, "learning_rate": 4.234929685723175e-06, "loss": 0.6417, "step": 8787 }, { "epoch": 0.56, "grad_norm": 1.7965401733734565, "learning_rate": 4.2339053530572735e-06, "loss": 0.7225, "step": 8788 }, { "epoch": 0.56, "grad_norm": 0.971094003894807, "learning_rate": 4.2328810533165095e-06, "loss": 0.586, "step": 8789 }, { "epoch": 0.56, "grad_norm": 1.8546385697037802, "learning_rate": 4.231856786544907e-06, "loss": 0.6522, "step": 8790 }, { "epoch": 0.56, "grad_norm": 1.7011291838052716, "learning_rate": 4.230832552786485e-06, "loss": 0.6897, "step": 8791 }, { "epoch": 0.56, "grad_norm": 1.4471325975455063, "learning_rate": 4.2298083520852636e-06, "loss": 0.6713, "step": 8792 }, { "epoch": 0.56, "grad_norm": 1.6713911111793047, "learning_rate": 4.2287841844852595e-06, "loss": 0.6348, "step": 8793 }, { "epoch": 0.56, "grad_norm": 1.5685711041304724, "learning_rate": 4.227760050030494e-06, "loss": 0.6412, "step": 8794 }, { "epoch": 0.56, "grad_norm": 1.406940132851908, "learning_rate": 4.226735948764976e-06, "loss": 0.6328, "step": 8795 }, { "epoch": 0.56, "grad_norm": 1.867821337481878, "learning_rate": 4.2257118807327216e-06, "loss": 0.6405, "step": 8796 }, { "epoch": 0.56, "grad_norm": 1.5959212876412021, "learning_rate": 4.2246878459777465e-06, "loss": 0.7624, "step": 8797 }, { "epoch": 0.56, "grad_norm": 2.180775742591053, "learning_rate": 4.223663844544056e-06, "loss": 0.6951, "step": 8798 }, { "epoch": 0.56, "grad_norm": 1.5544984058660993, "learning_rate": 4.222639876475663e-06, "loss": 0.5779, "step": 8799 }, { "epoch": 0.56, "grad_norm": 1.5744070840332072, "learning_rate": 4.221615941816575e-06, "loss": 0.6717, "step": 8800 }, { "epoch": 0.56, "grad_norm": 1.4817380865154124, "learning_rate": 4.220592040610798e-06, "loss": 0.6621, "step": 8801 }, { "epoch": 0.56, "grad_norm": 1.2351361958564933, "learning_rate": 4.219568172902336e-06, "loss": 0.6261, "step": 8802 }, { "epoch": 0.56, "grad_norm": 1.0925813062106366, "learning_rate": 4.218544338735197e-06, "loss": 0.6194, "step": 8803 }, { "epoch": 0.56, "grad_norm": 1.501781950889685, "learning_rate": 4.217520538153378e-06, "loss": 0.6287, "step": 8804 }, { "epoch": 0.56, "grad_norm": 1.8190513292480903, "learning_rate": 4.216496771200881e-06, "loss": 0.6993, "step": 8805 }, { "epoch": 0.56, "grad_norm": 1.4956147448193318, "learning_rate": 4.21547303792171e-06, "loss": 0.6622, "step": 8806 }, { "epoch": 0.56, "grad_norm": 1.4270893305614905, "learning_rate": 4.214449338359856e-06, "loss": 0.6906, "step": 8807 }, { "epoch": 0.56, "grad_norm": 1.3095344150353003, "learning_rate": 4.2134256725593206e-06, "loss": 0.6032, "step": 8808 }, { "epoch": 0.56, "grad_norm": 1.5053139535804279, "learning_rate": 4.2124020405640955e-06, "loss": 0.6452, "step": 8809 }, { "epoch": 0.56, "grad_norm": 1.4741350773970683, "learning_rate": 4.211378442418178e-06, "loss": 0.6331, "step": 8810 }, { "epoch": 0.56, "grad_norm": 1.5856411172931033, "learning_rate": 4.2103548781655555e-06, "loss": 0.6556, "step": 8811 }, { "epoch": 0.56, "grad_norm": 1.5535870561799925, "learning_rate": 4.209331347850224e-06, "loss": 0.6392, "step": 8812 }, { "epoch": 0.56, "grad_norm": 1.5285721021867538, "learning_rate": 4.2083078515161664e-06, "loss": 0.6405, "step": 8813 }, { "epoch": 0.56, "grad_norm": 1.1039664355948768, "learning_rate": 4.207284389207375e-06, "loss": 0.5602, "step": 8814 }, { "epoch": 0.56, "grad_norm": 1.481222733133298, "learning_rate": 4.206260960967836e-06, "loss": 0.6563, "step": 8815 }, { "epoch": 0.56, "grad_norm": 6.150506559503733, "learning_rate": 4.205237566841531e-06, "loss": 0.7683, "step": 8816 }, { "epoch": 0.56, "grad_norm": 1.466713628537286, "learning_rate": 4.204214206872448e-06, "loss": 0.658, "step": 8817 }, { "epoch": 0.56, "grad_norm": 1.4177099730965652, "learning_rate": 4.203190881104564e-06, "loss": 0.6331, "step": 8818 }, { "epoch": 0.56, "grad_norm": 1.5291018155797589, "learning_rate": 4.202167589581863e-06, "loss": 0.684, "step": 8819 }, { "epoch": 0.56, "grad_norm": 1.6281958482014298, "learning_rate": 4.201144332348321e-06, "loss": 0.6963, "step": 8820 }, { "epoch": 0.56, "grad_norm": 1.544518131998742, "learning_rate": 4.200121109447919e-06, "loss": 0.7425, "step": 8821 }, { "epoch": 0.56, "grad_norm": 1.4598972440446338, "learning_rate": 4.199097920924628e-06, "loss": 0.672, "step": 8822 }, { "epoch": 0.56, "grad_norm": 1.4175725702932032, "learning_rate": 4.198074766822429e-06, "loss": 0.7279, "step": 8823 }, { "epoch": 0.56, "grad_norm": 2.6056507929050583, "learning_rate": 4.197051647185288e-06, "loss": 0.6778, "step": 8824 }, { "epoch": 0.56, "grad_norm": 1.0678891308884357, "learning_rate": 4.196028562057181e-06, "loss": 0.6903, "step": 8825 }, { "epoch": 0.56, "grad_norm": 1.543457707957749, "learning_rate": 4.1950055114820785e-06, "loss": 0.5877, "step": 8826 }, { "epoch": 0.56, "grad_norm": 1.3725891426593606, "learning_rate": 4.193982495503946e-06, "loss": 0.6657, "step": 8827 }, { "epoch": 0.57, "grad_norm": 1.5051779176044169, "learning_rate": 4.1929595141667535e-06, "loss": 0.7325, "step": 8828 }, { "epoch": 0.57, "grad_norm": 1.6645302668109914, "learning_rate": 4.191936567514464e-06, "loss": 0.6492, "step": 8829 }, { "epoch": 0.57, "grad_norm": 1.2479727769153404, "learning_rate": 4.190913655591044e-06, "loss": 0.6462, "step": 8830 }, { "epoch": 0.57, "grad_norm": 1.5561275142905353, "learning_rate": 4.189890778440454e-06, "loss": 0.6969, "step": 8831 }, { "epoch": 0.57, "grad_norm": 1.7477609329189348, "learning_rate": 4.188867936106658e-06, "loss": 0.7631, "step": 8832 }, { "epoch": 0.57, "grad_norm": 1.771949552516809, "learning_rate": 4.187845128633611e-06, "loss": 0.6647, "step": 8833 }, { "epoch": 0.57, "grad_norm": 1.5696464426952652, "learning_rate": 4.1868223560652746e-06, "loss": 0.7053, "step": 8834 }, { "epoch": 0.57, "grad_norm": 2.071584275436015, "learning_rate": 4.185799618445605e-06, "loss": 0.7236, "step": 8835 }, { "epoch": 0.57, "grad_norm": 1.7487168261209864, "learning_rate": 4.184776915818557e-06, "loss": 0.6881, "step": 8836 }, { "epoch": 0.57, "grad_norm": 1.4656498442175918, "learning_rate": 4.1837542482280845e-06, "loss": 0.6926, "step": 8837 }, { "epoch": 0.57, "grad_norm": 1.450979935457109, "learning_rate": 4.182731615718138e-06, "loss": 0.6935, "step": 8838 }, { "epoch": 0.57, "grad_norm": 1.2403202102678885, "learning_rate": 4.181709018332672e-06, "loss": 0.6618, "step": 8839 }, { "epoch": 0.57, "grad_norm": 1.5114139521346903, "learning_rate": 4.18068645611563e-06, "loss": 0.6659, "step": 8840 }, { "epoch": 0.57, "grad_norm": 1.7109811356909603, "learning_rate": 4.179663929110964e-06, "loss": 0.7427, "step": 8841 }, { "epoch": 0.57, "grad_norm": 1.6301152340720924, "learning_rate": 4.178641437362618e-06, "loss": 0.6745, "step": 8842 }, { "epoch": 0.57, "grad_norm": 1.9624600532460355, "learning_rate": 4.177618980914536e-06, "loss": 0.6465, "step": 8843 }, { "epoch": 0.57, "grad_norm": 1.0165544383244638, "learning_rate": 4.176596559810664e-06, "loss": 0.6657, "step": 8844 }, { "epoch": 0.57, "grad_norm": 1.9345036708143566, "learning_rate": 4.1755741740949405e-06, "loss": 0.596, "step": 8845 }, { "epoch": 0.57, "grad_norm": 1.4741868341399726, "learning_rate": 4.174551823811308e-06, "loss": 0.7247, "step": 8846 }, { "epoch": 0.57, "grad_norm": 1.838589989050974, "learning_rate": 4.173529509003702e-06, "loss": 0.6334, "step": 8847 }, { "epoch": 0.57, "grad_norm": 2.1924033123236524, "learning_rate": 4.172507229716063e-06, "loss": 0.7085, "step": 8848 }, { "epoch": 0.57, "grad_norm": 1.628709818046417, "learning_rate": 4.171484985992323e-06, "loss": 0.624, "step": 8849 }, { "epoch": 0.57, "grad_norm": 1.4799055648786057, "learning_rate": 4.1704627778764175e-06, "loss": 0.6176, "step": 8850 }, { "epoch": 0.57, "grad_norm": 1.7500880661063036, "learning_rate": 4.169440605412278e-06, "loss": 0.7006, "step": 8851 }, { "epoch": 0.57, "grad_norm": 1.4563311567713073, "learning_rate": 4.168418468643836e-06, "loss": 0.5997, "step": 8852 }, { "epoch": 0.57, "grad_norm": 1.5845601933568192, "learning_rate": 4.167396367615023e-06, "loss": 0.6854, "step": 8853 }, { "epoch": 0.57, "grad_norm": 1.4928600266706367, "learning_rate": 4.166374302369763e-06, "loss": 0.6745, "step": 8854 }, { "epoch": 0.57, "grad_norm": 1.4139207624110928, "learning_rate": 4.165352272951985e-06, "loss": 0.6292, "step": 8855 }, { "epoch": 0.57, "grad_norm": 1.377566467151159, "learning_rate": 4.1643302794056105e-06, "loss": 0.6676, "step": 8856 }, { "epoch": 0.57, "grad_norm": 1.3419732380570601, "learning_rate": 4.163308321774566e-06, "loss": 0.591, "step": 8857 }, { "epoch": 0.57, "grad_norm": 1.690716167505307, "learning_rate": 4.162286400102771e-06, "loss": 0.6058, "step": 8858 }, { "epoch": 0.57, "grad_norm": 1.4869442863582807, "learning_rate": 4.161264514434148e-06, "loss": 0.664, "step": 8859 }, { "epoch": 0.57, "grad_norm": 1.5408818272677325, "learning_rate": 4.160242664812611e-06, "loss": 0.6337, "step": 8860 }, { "epoch": 0.57, "grad_norm": 1.5511445883532267, "learning_rate": 4.15922085128208e-06, "loss": 0.6678, "step": 8861 }, { "epoch": 0.57, "grad_norm": 1.4714517051854659, "learning_rate": 4.158199073886473e-06, "loss": 0.6218, "step": 8862 }, { "epoch": 0.57, "grad_norm": 1.7921346794306325, "learning_rate": 4.157177332669698e-06, "loss": 0.6514, "step": 8863 }, { "epoch": 0.57, "grad_norm": 1.526260751099195, "learning_rate": 4.1561556276756725e-06, "loss": 0.7905, "step": 8864 }, { "epoch": 0.57, "grad_norm": 1.6309127216804145, "learning_rate": 4.155133958948302e-06, "loss": 0.7004, "step": 8865 }, { "epoch": 0.57, "grad_norm": 1.5240627182365978, "learning_rate": 4.154112326531502e-06, "loss": 0.6706, "step": 8866 }, { "epoch": 0.57, "grad_norm": 1.7110248434120097, "learning_rate": 4.153090730469174e-06, "loss": 0.7913, "step": 8867 }, { "epoch": 0.57, "grad_norm": 1.1551391012557328, "learning_rate": 4.1520691708052295e-06, "loss": 0.6263, "step": 8868 }, { "epoch": 0.57, "grad_norm": 1.4662474317757377, "learning_rate": 4.151047647583568e-06, "loss": 0.753, "step": 8869 }, { "epoch": 0.57, "grad_norm": 1.7328141725144461, "learning_rate": 4.150026160848094e-06, "loss": 0.689, "step": 8870 }, { "epoch": 0.57, "grad_norm": 1.1089900748045425, "learning_rate": 4.149004710642712e-06, "loss": 0.6415, "step": 8871 }, { "epoch": 0.57, "grad_norm": 1.5795888000818394, "learning_rate": 4.147983297011318e-06, "loss": 0.6595, "step": 8872 }, { "epoch": 0.57, "grad_norm": 1.1297941877394937, "learning_rate": 4.146961919997813e-06, "loss": 0.6873, "step": 8873 }, { "epoch": 0.57, "grad_norm": 3.3833362642372733, "learning_rate": 4.14594057964609e-06, "loss": 0.6638, "step": 8874 }, { "epoch": 0.57, "grad_norm": 1.6285766377118729, "learning_rate": 4.144919276000048e-06, "loss": 0.7423, "step": 8875 }, { "epoch": 0.57, "grad_norm": 1.3704593603857074, "learning_rate": 4.143898009103578e-06, "loss": 0.6775, "step": 8876 }, { "epoch": 0.57, "grad_norm": 1.5457801492140495, "learning_rate": 4.142876779000573e-06, "loss": 0.6791, "step": 8877 }, { "epoch": 0.57, "grad_norm": 1.698127568508124, "learning_rate": 4.141855585734923e-06, "loss": 0.63, "step": 8878 }, { "epoch": 0.57, "grad_norm": 1.5335896557844488, "learning_rate": 4.1408344293505154e-06, "loss": 0.7263, "step": 8879 }, { "epoch": 0.57, "grad_norm": 2.0374470781186083, "learning_rate": 4.13981330989124e-06, "loss": 0.6598, "step": 8880 }, { "epoch": 0.57, "grad_norm": 1.4799113768142367, "learning_rate": 4.13879222740098e-06, "loss": 0.604, "step": 8881 }, { "epoch": 0.57, "grad_norm": 1.1351394381811046, "learning_rate": 4.1377711819236225e-06, "loss": 0.7109, "step": 8882 }, { "epoch": 0.57, "grad_norm": 1.3955455519682167, "learning_rate": 4.136750173503046e-06, "loss": 0.7009, "step": 8883 }, { "epoch": 0.57, "grad_norm": 1.6364290156022812, "learning_rate": 4.135729202183134e-06, "loss": 0.6035, "step": 8884 }, { "epoch": 0.57, "grad_norm": 1.3944408685960246, "learning_rate": 4.134708268007764e-06, "loss": 0.6638, "step": 8885 }, { "epoch": 0.57, "grad_norm": 1.4755140441829375, "learning_rate": 4.133687371020815e-06, "loss": 0.6118, "step": 8886 }, { "epoch": 0.57, "grad_norm": 1.5400656363648548, "learning_rate": 4.132666511266162e-06, "loss": 0.6549, "step": 8887 }, { "epoch": 0.57, "grad_norm": 1.49709230192146, "learning_rate": 4.131645688787679e-06, "loss": 0.6909, "step": 8888 }, { "epoch": 0.57, "grad_norm": 1.1201387104234675, "learning_rate": 4.130624903629242e-06, "loss": 0.6171, "step": 8889 }, { "epoch": 0.57, "grad_norm": 1.6795727349755603, "learning_rate": 4.129604155834718e-06, "loss": 0.7813, "step": 8890 }, { "epoch": 0.57, "grad_norm": 1.9652761945262522, "learning_rate": 4.12858344544798e-06, "loss": 0.6697, "step": 8891 }, { "epoch": 0.57, "grad_norm": 1.5318815592968473, "learning_rate": 4.127562772512893e-06, "loss": 0.7741, "step": 8892 }, { "epoch": 0.57, "grad_norm": 1.7723785860652195, "learning_rate": 4.126542137073325e-06, "loss": 0.6743, "step": 8893 }, { "epoch": 0.57, "grad_norm": 1.6049506788613508, "learning_rate": 4.12552153917314e-06, "loss": 0.6594, "step": 8894 }, { "epoch": 0.57, "grad_norm": 1.6372789924078697, "learning_rate": 4.124500978856204e-06, "loss": 0.7374, "step": 8895 }, { "epoch": 0.57, "grad_norm": 1.6461930225315615, "learning_rate": 4.123480456166374e-06, "loss": 0.6659, "step": 8896 }, { "epoch": 0.57, "grad_norm": 1.3172347742098862, "learning_rate": 4.122459971147514e-06, "loss": 0.6187, "step": 8897 }, { "epoch": 0.57, "grad_norm": 1.5143131545189608, "learning_rate": 4.121439523843478e-06, "loss": 0.742, "step": 8898 }, { "epoch": 0.57, "grad_norm": 1.6450056450456227, "learning_rate": 4.120419114298127e-06, "loss": 0.7162, "step": 8899 }, { "epoch": 0.57, "grad_norm": 1.856656607976222, "learning_rate": 4.119398742555314e-06, "loss": 0.6834, "step": 8900 }, { "epoch": 0.57, "grad_norm": 1.5914183410878802, "learning_rate": 4.118378408658891e-06, "loss": 0.6456, "step": 8901 }, { "epoch": 0.57, "grad_norm": 1.5690440624352175, "learning_rate": 4.117358112652714e-06, "loss": 0.662, "step": 8902 }, { "epoch": 0.57, "grad_norm": 1.390266238924334, "learning_rate": 4.1163378545806286e-06, "loss": 0.636, "step": 8903 }, { "epoch": 0.57, "grad_norm": 1.5320773339125, "learning_rate": 4.115317634486488e-06, "loss": 0.6059, "step": 8904 }, { "epoch": 0.57, "grad_norm": 1.4124884415368504, "learning_rate": 4.114297452414133e-06, "loss": 0.618, "step": 8905 }, { "epoch": 0.57, "grad_norm": 1.5706649003658286, "learning_rate": 4.113277308407415e-06, "loss": 0.6747, "step": 8906 }, { "epoch": 0.57, "grad_norm": 1.1843945818314385, "learning_rate": 4.112257202510173e-06, "loss": 0.7134, "step": 8907 }, { "epoch": 0.57, "grad_norm": 1.5218174802770887, "learning_rate": 4.111237134766251e-06, "loss": 0.681, "step": 8908 }, { "epoch": 0.57, "grad_norm": 1.4894482868236516, "learning_rate": 4.110217105219492e-06, "loss": 0.6021, "step": 8909 }, { "epoch": 0.57, "grad_norm": 1.669096820980801, "learning_rate": 4.10919711391373e-06, "loss": 0.6975, "step": 8910 }, { "epoch": 0.57, "grad_norm": 1.6207481065412865, "learning_rate": 4.108177160892807e-06, "loss": 0.7003, "step": 8911 }, { "epoch": 0.57, "grad_norm": 1.9723133937498625, "learning_rate": 4.107157246200552e-06, "loss": 0.7581, "step": 8912 }, { "epoch": 0.57, "grad_norm": 1.3454544379315296, "learning_rate": 4.106137369880804e-06, "loss": 0.6633, "step": 8913 }, { "epoch": 0.57, "grad_norm": 1.5619308644772, "learning_rate": 4.105117531977393e-06, "loss": 0.7408, "step": 8914 }, { "epoch": 0.57, "grad_norm": 1.1725425517884225, "learning_rate": 4.104097732534153e-06, "loss": 0.7093, "step": 8915 }, { "epoch": 0.57, "grad_norm": 1.5111936896549902, "learning_rate": 4.103077971594906e-06, "loss": 0.5843, "step": 8916 }, { "epoch": 0.57, "grad_norm": 1.2406178721754495, "learning_rate": 4.102058249203483e-06, "loss": 0.6994, "step": 8917 }, { "epoch": 0.57, "grad_norm": 1.893195758964987, "learning_rate": 4.101038565403713e-06, "loss": 0.6442, "step": 8918 }, { "epoch": 0.57, "grad_norm": 1.5398527012092216, "learning_rate": 4.1000189202394144e-06, "loss": 0.6764, "step": 8919 }, { "epoch": 0.57, "grad_norm": 1.4839227007308444, "learning_rate": 4.098999313754413e-06, "loss": 0.7402, "step": 8920 }, { "epoch": 0.57, "grad_norm": 1.5838101624908156, "learning_rate": 4.097979745992526e-06, "loss": 0.6328, "step": 8921 }, { "epoch": 0.57, "grad_norm": 1.510451488757263, "learning_rate": 4.096960216997575e-06, "loss": 0.6666, "step": 8922 }, { "epoch": 0.57, "grad_norm": 1.6221731383936266, "learning_rate": 4.095940726813375e-06, "loss": 0.6675, "step": 8923 }, { "epoch": 0.57, "grad_norm": 1.845316947055617, "learning_rate": 4.094921275483745e-06, "loss": 0.7141, "step": 8924 }, { "epoch": 0.57, "grad_norm": 1.663662289233032, "learning_rate": 4.093901863052495e-06, "loss": 0.6665, "step": 8925 }, { "epoch": 0.57, "grad_norm": 1.5327496874499698, "learning_rate": 4.092882489563439e-06, "loss": 0.7212, "step": 8926 }, { "epoch": 0.57, "grad_norm": 1.8961762652223093, "learning_rate": 4.091863155060389e-06, "loss": 0.6867, "step": 8927 }, { "epoch": 0.57, "grad_norm": 1.688464758728093, "learning_rate": 4.090843859587151e-06, "loss": 0.7114, "step": 8928 }, { "epoch": 0.57, "grad_norm": 1.8461562262157036, "learning_rate": 4.0898246031875346e-06, "loss": 0.632, "step": 8929 }, { "epoch": 0.57, "grad_norm": 1.5358612344756648, "learning_rate": 4.088805385905342e-06, "loss": 0.5938, "step": 8930 }, { "epoch": 0.57, "grad_norm": 1.4901871070881199, "learning_rate": 4.087786207784383e-06, "loss": 0.6823, "step": 8931 }, { "epoch": 0.57, "grad_norm": 1.7224215258149183, "learning_rate": 4.086767068868453e-06, "loss": 0.7219, "step": 8932 }, { "epoch": 0.57, "grad_norm": 1.5183054308665782, "learning_rate": 4.085747969201357e-06, "loss": 0.7368, "step": 8933 }, { "epoch": 0.57, "grad_norm": 1.4938535524364729, "learning_rate": 4.084728908826891e-06, "loss": 0.6703, "step": 8934 }, { "epoch": 0.57, "grad_norm": 1.9602799881091968, "learning_rate": 4.083709887788852e-06, "loss": 0.6601, "step": 8935 }, { "epoch": 0.57, "grad_norm": 1.6506357610324054, "learning_rate": 4.082690906131039e-06, "loss": 0.7029, "step": 8936 }, { "epoch": 0.57, "grad_norm": 1.4989414230312619, "learning_rate": 4.081671963897241e-06, "loss": 0.7805, "step": 8937 }, { "epoch": 0.57, "grad_norm": 1.8994101919571884, "learning_rate": 4.080653061131256e-06, "loss": 0.6075, "step": 8938 }, { "epoch": 0.57, "grad_norm": 1.9618974587803293, "learning_rate": 4.079634197876867e-06, "loss": 0.7011, "step": 8939 }, { "epoch": 0.57, "grad_norm": 1.523884759244949, "learning_rate": 4.07861537417787e-06, "loss": 0.7315, "step": 8940 }, { "epoch": 0.57, "grad_norm": 1.5560625642467518, "learning_rate": 4.077596590078044e-06, "loss": 0.6241, "step": 8941 }, { "epoch": 0.57, "grad_norm": 1.6948750220275506, "learning_rate": 4.076577845621181e-06, "loss": 0.7183, "step": 8942 }, { "epoch": 0.57, "grad_norm": 1.6893785084522814, "learning_rate": 4.075559140851061e-06, "loss": 0.6589, "step": 8943 }, { "epoch": 0.57, "grad_norm": 1.3855452970719302, "learning_rate": 4.0745404758114644e-06, "loss": 0.6962, "step": 8944 }, { "epoch": 0.57, "grad_norm": 1.8390311581939043, "learning_rate": 4.073521850546177e-06, "loss": 0.6694, "step": 8945 }, { "epoch": 0.57, "grad_norm": 1.5530524061233946, "learning_rate": 4.072503265098972e-06, "loss": 0.6572, "step": 8946 }, { "epoch": 0.57, "grad_norm": 1.7626172709407915, "learning_rate": 4.07148471951363e-06, "loss": 0.6417, "step": 8947 }, { "epoch": 0.57, "grad_norm": 1.6781133836135427, "learning_rate": 4.0704662138339204e-06, "loss": 0.6367, "step": 8948 }, { "epoch": 0.57, "grad_norm": 1.5871519050201108, "learning_rate": 4.069447748103621e-06, "loss": 0.6975, "step": 8949 }, { "epoch": 0.57, "grad_norm": 1.7136905535750049, "learning_rate": 4.068429322366502e-06, "loss": 0.7077, "step": 8950 }, { "epoch": 0.57, "grad_norm": 1.2967853399547613, "learning_rate": 4.067410936666335e-06, "loss": 0.6161, "step": 8951 }, { "epoch": 0.57, "grad_norm": 1.919996245218696, "learning_rate": 4.066392591046883e-06, "loss": 0.6988, "step": 8952 }, { "epoch": 0.57, "grad_norm": 1.323972834377914, "learning_rate": 4.065374285551917e-06, "loss": 0.5606, "step": 8953 }, { "epoch": 0.57, "grad_norm": 1.5492070451745805, "learning_rate": 4.064356020225202e-06, "loss": 0.6488, "step": 8954 }, { "epoch": 0.57, "grad_norm": 4.175375816828904, "learning_rate": 4.063337795110497e-06, "loss": 0.6655, "step": 8955 }, { "epoch": 0.57, "grad_norm": 1.4392688944208156, "learning_rate": 4.062319610251566e-06, "loss": 0.6874, "step": 8956 }, { "epoch": 0.57, "grad_norm": 1.6706018879802116, "learning_rate": 4.0613014656921675e-06, "loss": 0.7321, "step": 8957 }, { "epoch": 0.57, "grad_norm": 1.6827608800068872, "learning_rate": 4.0602833614760605e-06, "loss": 0.6129, "step": 8958 }, { "epoch": 0.57, "grad_norm": 1.1414376984556733, "learning_rate": 4.059265297646999e-06, "loss": 0.6021, "step": 8959 }, { "epoch": 0.57, "grad_norm": 1.7708184200061288, "learning_rate": 4.05824727424874e-06, "loss": 0.787, "step": 8960 }, { "epoch": 0.57, "grad_norm": 1.5257782481717819, "learning_rate": 4.057229291325032e-06, "loss": 0.6393, "step": 8961 }, { "epoch": 0.57, "grad_norm": 1.7317701960461864, "learning_rate": 4.056211348919629e-06, "loss": 0.5946, "step": 8962 }, { "epoch": 0.57, "grad_norm": 1.5382297844161192, "learning_rate": 4.05519344707628e-06, "loss": 0.8148, "step": 8963 }, { "epoch": 0.57, "grad_norm": 1.4897525002576042, "learning_rate": 4.054175585838729e-06, "loss": 0.6621, "step": 8964 }, { "epoch": 0.57, "grad_norm": 1.629138310612249, "learning_rate": 4.0531577652507256e-06, "loss": 0.7569, "step": 8965 }, { "epoch": 0.57, "grad_norm": 1.3869324915104917, "learning_rate": 4.0521399853560116e-06, "loss": 0.706, "step": 8966 }, { "epoch": 0.57, "grad_norm": 1.2175295047563697, "learning_rate": 4.05112224619833e-06, "loss": 0.7421, "step": 8967 }, { "epoch": 0.57, "grad_norm": 1.1101125396716498, "learning_rate": 4.050104547821419e-06, "loss": 0.6718, "step": 8968 }, { "epoch": 0.57, "grad_norm": 1.1032245298529408, "learning_rate": 4.049086890269021e-06, "loss": 0.7056, "step": 8969 }, { "epoch": 0.57, "grad_norm": 1.4388269361084487, "learning_rate": 4.048069273584867e-06, "loss": 0.6563, "step": 8970 }, { "epoch": 0.57, "grad_norm": 1.6971036073455557, "learning_rate": 4.047051697812697e-06, "loss": 0.6931, "step": 8971 }, { "epoch": 0.57, "grad_norm": 1.5994351809857494, "learning_rate": 4.046034162996242e-06, "loss": 0.7114, "step": 8972 }, { "epoch": 0.57, "grad_norm": 1.5239567284023796, "learning_rate": 4.0450166691792335e-06, "loss": 0.6621, "step": 8973 }, { "epoch": 0.57, "grad_norm": 1.5667956208395306, "learning_rate": 4.043999216405405e-06, "loss": 0.6348, "step": 8974 }, { "epoch": 0.57, "grad_norm": 1.7186803634967283, "learning_rate": 4.042981804718478e-06, "loss": 0.6572, "step": 8975 }, { "epoch": 0.57, "grad_norm": 1.7299035589775684, "learning_rate": 4.041964434162184e-06, "loss": 0.6749, "step": 8976 }, { "epoch": 0.57, "grad_norm": 1.530643873325176, "learning_rate": 4.040947104780244e-06, "loss": 0.6541, "step": 8977 }, { "epoch": 0.57, "grad_norm": 1.7892229172001017, "learning_rate": 4.039929816616383e-06, "loss": 0.6755, "step": 8978 }, { "epoch": 0.57, "grad_norm": 1.6192635742029446, "learning_rate": 4.03891256971432e-06, "loss": 0.7325, "step": 8979 }, { "epoch": 0.57, "grad_norm": 1.4987882422699428, "learning_rate": 4.037895364117778e-06, "loss": 0.7301, "step": 8980 }, { "epoch": 0.57, "grad_norm": 1.4451592551348575, "learning_rate": 4.036878199870469e-06, "loss": 0.6208, "step": 8981 }, { "epoch": 0.57, "grad_norm": 1.496980531359934, "learning_rate": 4.035861077016111e-06, "loss": 0.6544, "step": 8982 }, { "epoch": 0.57, "grad_norm": 1.7122543472852685, "learning_rate": 4.034843995598421e-06, "loss": 0.684, "step": 8983 }, { "epoch": 0.58, "grad_norm": 1.0449980983330445, "learning_rate": 4.033826955661106e-06, "loss": 0.6543, "step": 8984 }, { "epoch": 0.58, "grad_norm": 1.0678925585966397, "learning_rate": 4.032809957247878e-06, "loss": 0.6624, "step": 8985 }, { "epoch": 0.58, "grad_norm": 1.3984988074452045, "learning_rate": 4.031793000402445e-06, "loss": 0.7289, "step": 8986 }, { "epoch": 0.58, "grad_norm": 1.5892370129392661, "learning_rate": 4.030776085168516e-06, "loss": 0.6126, "step": 8987 }, { "epoch": 0.58, "grad_norm": 1.419904450859584, "learning_rate": 4.029759211589794e-06, "loss": 0.6485, "step": 8988 }, { "epoch": 0.58, "grad_norm": 1.1829555348597414, "learning_rate": 4.028742379709982e-06, "loss": 0.6236, "step": 8989 }, { "epoch": 0.58, "grad_norm": 1.2718798799469428, "learning_rate": 4.0277255895727814e-06, "loss": 0.679, "step": 8990 }, { "epoch": 0.58, "grad_norm": 1.2975228656601547, "learning_rate": 4.0267088412218906e-06, "loss": 0.6201, "step": 8991 }, { "epoch": 0.58, "grad_norm": 1.6278608426457872, "learning_rate": 4.025692134701011e-06, "loss": 0.6383, "step": 8992 }, { "epoch": 0.58, "grad_norm": 1.6157321682601014, "learning_rate": 4.024675470053836e-06, "loss": 0.726, "step": 8993 }, { "epoch": 0.58, "grad_norm": 1.499152729431702, "learning_rate": 4.023658847324058e-06, "loss": 0.6951, "step": 8994 }, { "epoch": 0.58, "grad_norm": 1.9994352053003237, "learning_rate": 4.0226422665553724e-06, "loss": 0.6772, "step": 8995 }, { "epoch": 0.58, "grad_norm": 1.3979614207873798, "learning_rate": 4.02162572779147e-06, "loss": 0.6831, "step": 8996 }, { "epoch": 0.58, "grad_norm": 1.4793115087960322, "learning_rate": 4.020609231076035e-06, "loss": 0.674, "step": 8997 }, { "epoch": 0.58, "grad_norm": 1.4300809912420243, "learning_rate": 4.019592776452759e-06, "loss": 0.5927, "step": 8998 }, { "epoch": 0.58, "grad_norm": 1.9437799966572071, "learning_rate": 4.018576363965324e-06, "loss": 0.6344, "step": 8999 }, { "epoch": 0.58, "grad_norm": 1.4840040503000549, "learning_rate": 4.017559993657416e-06, "loss": 0.6562, "step": 9000 }, { "epoch": 0.58, "grad_norm": 1.5891486110970054, "learning_rate": 4.0165436655727144e-06, "loss": 0.6533, "step": 9001 }, { "epoch": 0.58, "grad_norm": 1.4944597280717038, "learning_rate": 4.015527379754899e-06, "loss": 0.6016, "step": 9002 }, { "epoch": 0.58, "grad_norm": 1.4163146953118353, "learning_rate": 4.01451113624765e-06, "loss": 0.7221, "step": 9003 }, { "epoch": 0.58, "grad_norm": 1.4748824591719794, "learning_rate": 4.01349493509464e-06, "loss": 0.784, "step": 9004 }, { "epoch": 0.58, "grad_norm": 1.6614118648614151, "learning_rate": 4.012478776339547e-06, "loss": 0.7155, "step": 9005 }, { "epoch": 0.58, "grad_norm": 1.6904421140486852, "learning_rate": 4.011462660026038e-06, "loss": 0.7493, "step": 9006 }, { "epoch": 0.58, "grad_norm": 1.4401108928950648, "learning_rate": 4.010446586197788e-06, "loss": 0.5871, "step": 9007 }, { "epoch": 0.58, "grad_norm": 1.557397712713633, "learning_rate": 4.009430554898464e-06, "loss": 0.6929, "step": 9008 }, { "epoch": 0.58, "grad_norm": 1.550363005284889, "learning_rate": 4.008414566171733e-06, "loss": 0.6761, "step": 9009 }, { "epoch": 0.58, "grad_norm": 1.2662764989218185, "learning_rate": 4.007398620061262e-06, "loss": 0.6594, "step": 9010 }, { "epoch": 0.58, "grad_norm": 1.6616409620601424, "learning_rate": 4.006382716610711e-06, "loss": 0.7192, "step": 9011 }, { "epoch": 0.58, "grad_norm": 1.5895730132787322, "learning_rate": 4.0053668558637444e-06, "loss": 0.6819, "step": 9012 }, { "epoch": 0.58, "grad_norm": 1.5153803893210933, "learning_rate": 4.00435103786402e-06, "loss": 0.6958, "step": 9013 }, { "epoch": 0.58, "grad_norm": 1.360674227976285, "learning_rate": 4.003335262655195e-06, "loss": 0.5937, "step": 9014 }, { "epoch": 0.58, "grad_norm": 1.5717093962599349, "learning_rate": 4.002319530280925e-06, "loss": 0.7038, "step": 9015 }, { "epoch": 0.58, "grad_norm": 1.6283701788796754, "learning_rate": 4.001303840784868e-06, "loss": 0.6281, "step": 9016 }, { "epoch": 0.58, "grad_norm": 1.4619745882374273, "learning_rate": 4.000288194210671e-06, "loss": 0.6656, "step": 9017 }, { "epoch": 0.58, "grad_norm": 1.7443658280559913, "learning_rate": 3.999272590601988e-06, "loss": 0.6668, "step": 9018 }, { "epoch": 0.58, "grad_norm": 1.325672385438165, "learning_rate": 3.998257030002466e-06, "loss": 0.7308, "step": 9019 }, { "epoch": 0.58, "grad_norm": 1.630924021019278, "learning_rate": 3.9972415124557514e-06, "loss": 0.7009, "step": 9020 }, { "epoch": 0.58, "grad_norm": 1.3006760933940544, "learning_rate": 3.996226038005491e-06, "loss": 0.6265, "step": 9021 }, { "epoch": 0.58, "grad_norm": 1.583405356869618, "learning_rate": 3.995210606695324e-06, "loss": 0.7264, "step": 9022 }, { "epoch": 0.58, "grad_norm": 1.7695615685096284, "learning_rate": 3.994195218568896e-06, "loss": 0.6787, "step": 9023 }, { "epoch": 0.58, "grad_norm": 1.4450013148922902, "learning_rate": 3.9931798736698424e-06, "loss": 0.7507, "step": 9024 }, { "epoch": 0.58, "grad_norm": 1.7409390987590636, "learning_rate": 3.992164572041805e-06, "loss": 0.6894, "step": 9025 }, { "epoch": 0.58, "grad_norm": 1.0719685664152625, "learning_rate": 3.991149313728413e-06, "loss": 0.7325, "step": 9026 }, { "epoch": 0.58, "grad_norm": 1.5733959847629302, "learning_rate": 3.990134098773304e-06, "loss": 0.6096, "step": 9027 }, { "epoch": 0.58, "grad_norm": 1.5701051782964524, "learning_rate": 3.989118927220111e-06, "loss": 0.6663, "step": 9028 }, { "epoch": 0.58, "grad_norm": 1.5581370030272241, "learning_rate": 3.988103799112461e-06, "loss": 0.6667, "step": 9029 }, { "epoch": 0.58, "grad_norm": 1.0257335300830042, "learning_rate": 3.987088714493985e-06, "loss": 0.689, "step": 9030 }, { "epoch": 0.58, "grad_norm": 1.5721664049183182, "learning_rate": 3.986073673408306e-06, "loss": 0.7326, "step": 9031 }, { "epoch": 0.58, "grad_norm": 1.4013254716498613, "learning_rate": 3.985058675899052e-06, "loss": 0.6731, "step": 9032 }, { "epoch": 0.58, "grad_norm": 1.3874461506333522, "learning_rate": 3.984043722009842e-06, "loss": 0.6891, "step": 9033 }, { "epoch": 0.58, "grad_norm": 1.5649651332357208, "learning_rate": 3.983028811784298e-06, "loss": 0.785, "step": 9034 }, { "epoch": 0.58, "grad_norm": 1.3755434986689214, "learning_rate": 3.98201394526604e-06, "loss": 0.6701, "step": 9035 }, { "epoch": 0.58, "grad_norm": 1.6124053685618265, "learning_rate": 3.980999122498681e-06, "loss": 0.6502, "step": 9036 }, { "epoch": 0.58, "grad_norm": 1.5140107437952937, "learning_rate": 3.97998434352584e-06, "loss": 0.7504, "step": 9037 }, { "epoch": 0.58, "grad_norm": 1.7024127003754899, "learning_rate": 3.978969608391128e-06, "loss": 0.7669, "step": 9038 }, { "epoch": 0.58, "grad_norm": 1.4466748607068765, "learning_rate": 3.977954917138158e-06, "loss": 0.5892, "step": 9039 }, { "epoch": 0.58, "grad_norm": 1.5764996718641775, "learning_rate": 3.976940269810537e-06, "loss": 0.6787, "step": 9040 }, { "epoch": 0.58, "grad_norm": 1.0898724574529148, "learning_rate": 3.975925666451875e-06, "loss": 0.6805, "step": 9041 }, { "epoch": 0.58, "grad_norm": 1.776642654537701, "learning_rate": 3.9749111071057745e-06, "loss": 0.7674, "step": 9042 }, { "epoch": 0.58, "grad_norm": 1.485603977609759, "learning_rate": 3.973896591815841e-06, "loss": 0.6638, "step": 9043 }, { "epoch": 0.58, "grad_norm": 1.4780725724292973, "learning_rate": 3.972882120625675e-06, "loss": 0.7073, "step": 9044 }, { "epoch": 0.58, "grad_norm": 1.6213441094972487, "learning_rate": 3.971867693578879e-06, "loss": 0.8662, "step": 9045 }, { "epoch": 0.58, "grad_norm": 1.4909224648025794, "learning_rate": 3.970853310719047e-06, "loss": 0.6909, "step": 9046 }, { "epoch": 0.58, "grad_norm": 2.33043332930819, "learning_rate": 3.969838972089778e-06, "loss": 0.6434, "step": 9047 }, { "epoch": 0.58, "grad_norm": 1.5892577749864458, "learning_rate": 3.968824677734667e-06, "loss": 0.7221, "step": 9048 }, { "epoch": 0.58, "grad_norm": 1.460145874801153, "learning_rate": 3.967810427697301e-06, "loss": 0.6272, "step": 9049 }, { "epoch": 0.58, "grad_norm": 1.4771399995436838, "learning_rate": 3.9667962220212765e-06, "loss": 0.5973, "step": 9050 }, { "epoch": 0.58, "grad_norm": 2.121382804001659, "learning_rate": 3.965782060750178e-06, "loss": 0.6404, "step": 9051 }, { "epoch": 0.58, "grad_norm": 1.8828382712094347, "learning_rate": 3.964767943927596e-06, "loss": 0.6323, "step": 9052 }, { "epoch": 0.58, "grad_norm": 1.456893243895873, "learning_rate": 3.96375387159711e-06, "loss": 0.6896, "step": 9053 }, { "epoch": 0.58, "grad_norm": 1.6414205662698207, "learning_rate": 3.962739843802307e-06, "loss": 0.6734, "step": 9054 }, { "epoch": 0.58, "grad_norm": 1.6473911075659544, "learning_rate": 3.961725860586763e-06, "loss": 0.7279, "step": 9055 }, { "epoch": 0.58, "grad_norm": 1.067362675019201, "learning_rate": 3.960711921994061e-06, "loss": 0.6901, "step": 9056 }, { "epoch": 0.58, "grad_norm": 1.5406739579542064, "learning_rate": 3.9596980280677775e-06, "loss": 0.6795, "step": 9057 }, { "epoch": 0.58, "grad_norm": 1.1580007570842221, "learning_rate": 3.958684178851486e-06, "loss": 0.6557, "step": 9058 }, { "epoch": 0.58, "grad_norm": 1.6622664810731191, "learning_rate": 3.957670374388762e-06, "loss": 0.7188, "step": 9059 }, { "epoch": 0.58, "grad_norm": 1.097552568005028, "learning_rate": 3.956656614723173e-06, "loss": 0.5968, "step": 9060 }, { "epoch": 0.58, "grad_norm": 1.352111155875758, "learning_rate": 3.955642899898293e-06, "loss": 0.6756, "step": 9061 }, { "epoch": 0.58, "grad_norm": 1.4103741423483198, "learning_rate": 3.954629229957684e-06, "loss": 0.6845, "step": 9062 }, { "epoch": 0.58, "grad_norm": 1.394779321344261, "learning_rate": 3.953615604944915e-06, "loss": 0.5783, "step": 9063 }, { "epoch": 0.58, "grad_norm": 1.5611869944215007, "learning_rate": 3.952602024903548e-06, "loss": 0.6076, "step": 9064 }, { "epoch": 0.58, "grad_norm": 1.5070248960485375, "learning_rate": 3.9515884898771455e-06, "loss": 0.7011, "step": 9065 }, { "epoch": 0.58, "grad_norm": 2.23378077880887, "learning_rate": 3.950574999909267e-06, "loss": 0.6783, "step": 9066 }, { "epoch": 0.58, "grad_norm": 1.4397696342220012, "learning_rate": 3.949561555043469e-06, "loss": 0.6751, "step": 9067 }, { "epoch": 0.58, "grad_norm": 1.2165455829096068, "learning_rate": 3.94854815532331e-06, "loss": 0.7027, "step": 9068 }, { "epoch": 0.58, "grad_norm": 1.1762882369129557, "learning_rate": 3.94753480079234e-06, "loss": 0.6791, "step": 9069 }, { "epoch": 0.58, "grad_norm": 1.8224207271788664, "learning_rate": 3.9465214914941145e-06, "loss": 0.7423, "step": 9070 }, { "epoch": 0.58, "grad_norm": 1.529938394888144, "learning_rate": 3.94550822747218e-06, "loss": 0.7077, "step": 9071 }, { "epoch": 0.58, "grad_norm": 1.407259896843311, "learning_rate": 3.9444950087700866e-06, "loss": 0.7423, "step": 9072 }, { "epoch": 0.58, "grad_norm": 1.577416283598191, "learning_rate": 3.943481835431379e-06, "loss": 0.6818, "step": 9073 }, { "epoch": 0.58, "grad_norm": 1.4945931363459604, "learning_rate": 3.942468707499603e-06, "loss": 0.6421, "step": 9074 }, { "epoch": 0.58, "grad_norm": 1.7270649482871303, "learning_rate": 3.9414556250183e-06, "loss": 0.6983, "step": 9075 }, { "epoch": 0.58, "grad_norm": 1.4868120191352612, "learning_rate": 3.940442588031009e-06, "loss": 0.7176, "step": 9076 }, { "epoch": 0.58, "grad_norm": 1.4799151379997315, "learning_rate": 3.939429596581271e-06, "loss": 0.6393, "step": 9077 }, { "epoch": 0.58, "grad_norm": 1.4770216718058609, "learning_rate": 3.9384166507126185e-06, "loss": 0.618, "step": 9078 }, { "epoch": 0.58, "grad_norm": 1.3757031499346524, "learning_rate": 3.937403750468588e-06, "loss": 0.7057, "step": 9079 }, { "epoch": 0.58, "grad_norm": 1.7375095508532408, "learning_rate": 3.936390895892711e-06, "loss": 0.724, "step": 9080 }, { "epoch": 0.58, "grad_norm": 1.304810293225632, "learning_rate": 3.935378087028521e-06, "loss": 0.6196, "step": 9081 }, { "epoch": 0.58, "grad_norm": 1.8553823886367695, "learning_rate": 3.93436532391954e-06, "loss": 0.7254, "step": 9082 }, { "epoch": 0.58, "grad_norm": 1.2411971601220835, "learning_rate": 3.9333526066093e-06, "loss": 0.6463, "step": 9083 }, { "epoch": 0.58, "grad_norm": 1.4508767023640248, "learning_rate": 3.932339935141324e-06, "loss": 0.6807, "step": 9084 }, { "epoch": 0.58, "grad_norm": 1.6856633731543167, "learning_rate": 3.931327309559133e-06, "loss": 0.7695, "step": 9085 }, { "epoch": 0.58, "grad_norm": 1.6649822811025885, "learning_rate": 3.93031472990625e-06, "loss": 0.6912, "step": 9086 }, { "epoch": 0.58, "grad_norm": 1.7323692238677784, "learning_rate": 3.9293021962261906e-06, "loss": 0.6574, "step": 9087 }, { "epoch": 0.58, "grad_norm": 1.1648662682933335, "learning_rate": 3.928289708562475e-06, "loss": 0.6412, "step": 9088 }, { "epoch": 0.58, "grad_norm": 1.4888696847458303, "learning_rate": 3.927277266958614e-06, "loss": 0.7239, "step": 9089 }, { "epoch": 0.58, "grad_norm": 1.8711470185184103, "learning_rate": 3.926264871458124e-06, "loss": 0.7074, "step": 9090 }, { "epoch": 0.58, "grad_norm": 1.6271018313370043, "learning_rate": 3.925252522104512e-06, "loss": 0.6914, "step": 9091 }, { "epoch": 0.58, "grad_norm": 1.1518634487197847, "learning_rate": 3.924240218941288e-06, "loss": 0.6369, "step": 9092 }, { "epoch": 0.58, "grad_norm": 1.5990336203078042, "learning_rate": 3.923227962011959e-06, "loss": 0.6423, "step": 9093 }, { "epoch": 0.58, "grad_norm": 1.4938475082620366, "learning_rate": 3.922215751360029e-06, "loss": 0.668, "step": 9094 }, { "epoch": 0.58, "grad_norm": 1.4747803837686404, "learning_rate": 3.9212035870290035e-06, "loss": 0.6127, "step": 9095 }, { "epoch": 0.58, "grad_norm": 1.430672001888112, "learning_rate": 3.9201914690623785e-06, "loss": 0.6495, "step": 9096 }, { "epoch": 0.58, "grad_norm": 1.5466621536946927, "learning_rate": 3.919179397503659e-06, "loss": 0.6859, "step": 9097 }, { "epoch": 0.58, "grad_norm": 1.1525639690202767, "learning_rate": 3.918167372396333e-06, "loss": 0.7162, "step": 9098 }, { "epoch": 0.58, "grad_norm": 1.5706883159654763, "learning_rate": 3.917155393783903e-06, "loss": 0.7682, "step": 9099 }, { "epoch": 0.58, "grad_norm": 1.4402202156433395, "learning_rate": 3.916143461709857e-06, "loss": 0.6805, "step": 9100 }, { "epoch": 0.58, "grad_norm": 0.9740568299714379, "learning_rate": 3.9151315762176885e-06, "loss": 0.6011, "step": 9101 }, { "epoch": 0.58, "grad_norm": 1.3861444393595042, "learning_rate": 3.914119737350886e-06, "loss": 0.5856, "step": 9102 }, { "epoch": 0.58, "grad_norm": 1.6885734484596582, "learning_rate": 3.9131079451529345e-06, "loss": 0.731, "step": 9103 }, { "epoch": 0.58, "grad_norm": 1.503446140157392, "learning_rate": 3.912096199667321e-06, "loss": 0.7073, "step": 9104 }, { "epoch": 0.58, "grad_norm": 1.6400027610749794, "learning_rate": 3.9110845009375255e-06, "loss": 0.7606, "step": 9105 }, { "epoch": 0.58, "grad_norm": 1.6565223088791652, "learning_rate": 3.9100728490070305e-06, "loss": 0.7112, "step": 9106 }, { "epoch": 0.58, "grad_norm": 1.600788639797169, "learning_rate": 3.909061243919313e-06, "loss": 0.6633, "step": 9107 }, { "epoch": 0.58, "grad_norm": 1.5296114040610889, "learning_rate": 3.908049685717854e-06, "loss": 0.8005, "step": 9108 }, { "epoch": 0.58, "grad_norm": 1.35666190975043, "learning_rate": 3.907038174446122e-06, "loss": 0.6508, "step": 9109 }, { "epoch": 0.58, "grad_norm": 1.5733843010089084, "learning_rate": 3.906026710147592e-06, "loss": 0.7383, "step": 9110 }, { "epoch": 0.58, "grad_norm": 1.5691512235550904, "learning_rate": 3.905015292865738e-06, "loss": 0.6784, "step": 9111 }, { "epoch": 0.58, "grad_norm": 1.5302251442102373, "learning_rate": 3.904003922644024e-06, "loss": 0.6717, "step": 9112 }, { "epoch": 0.58, "grad_norm": 1.2111821659158406, "learning_rate": 3.902992599525921e-06, "loss": 0.7341, "step": 9113 }, { "epoch": 0.58, "grad_norm": 1.674439791134616, "learning_rate": 3.901981323554887e-06, "loss": 0.6683, "step": 9114 }, { "epoch": 0.58, "grad_norm": 1.348060849745125, "learning_rate": 3.900970094774391e-06, "loss": 0.6074, "step": 9115 }, { "epoch": 0.58, "grad_norm": 1.4251705209237486, "learning_rate": 3.899958913227889e-06, "loss": 0.6828, "step": 9116 }, { "epoch": 0.58, "grad_norm": 1.5709940430059273, "learning_rate": 3.898947778958845e-06, "loss": 0.7026, "step": 9117 }, { "epoch": 0.58, "grad_norm": 1.5989378874748998, "learning_rate": 3.897936692010708e-06, "loss": 0.6846, "step": 9118 }, { "epoch": 0.58, "grad_norm": 1.439301815938755, "learning_rate": 3.8969256524269395e-06, "loss": 0.7197, "step": 9119 }, { "epoch": 0.58, "grad_norm": 1.5574087932623808, "learning_rate": 3.8959146602509865e-06, "loss": 0.7392, "step": 9120 }, { "epoch": 0.58, "grad_norm": 1.4792706756813627, "learning_rate": 3.894903715526301e-06, "loss": 0.6717, "step": 9121 }, { "epoch": 0.58, "grad_norm": 1.632517551832119, "learning_rate": 3.893892818296333e-06, "loss": 0.7063, "step": 9122 }, { "epoch": 0.58, "grad_norm": 1.0151215605204114, "learning_rate": 3.892881968604525e-06, "loss": 0.5921, "step": 9123 }, { "epoch": 0.58, "grad_norm": 2.3494647362031458, "learning_rate": 3.891871166494327e-06, "loss": 0.7462, "step": 9124 }, { "epoch": 0.58, "grad_norm": 1.7533886090126363, "learning_rate": 3.890860412009176e-06, "loss": 0.6912, "step": 9125 }, { "epoch": 0.58, "grad_norm": 1.3973365572584724, "learning_rate": 3.889849705192515e-06, "loss": 0.7031, "step": 9126 }, { "epoch": 0.58, "grad_norm": 1.6371365297146545, "learning_rate": 3.888839046087779e-06, "loss": 0.599, "step": 9127 }, { "epoch": 0.58, "grad_norm": 1.9236312734036523, "learning_rate": 3.887828434738408e-06, "loss": 0.7511, "step": 9128 }, { "epoch": 0.58, "grad_norm": 1.369243851633979, "learning_rate": 3.886817871187832e-06, "loss": 0.6623, "step": 9129 }, { "epoch": 0.58, "grad_norm": 1.5362357069867092, "learning_rate": 3.885807355479485e-06, "loss": 0.7506, "step": 9130 }, { "epoch": 0.58, "grad_norm": 1.573678896422022, "learning_rate": 3.8847968876567985e-06, "loss": 0.575, "step": 9131 }, { "epoch": 0.58, "grad_norm": 1.5074025818449397, "learning_rate": 3.883786467763196e-06, "loss": 0.6798, "step": 9132 }, { "epoch": 0.58, "grad_norm": 1.5352974262084482, "learning_rate": 3.882776095842108e-06, "loss": 0.6481, "step": 9133 }, { "epoch": 0.58, "grad_norm": 1.422425180646882, "learning_rate": 3.881765771936954e-06, "loss": 0.6621, "step": 9134 }, { "epoch": 0.58, "grad_norm": 1.5992333320381182, "learning_rate": 3.880755496091158e-06, "loss": 0.7074, "step": 9135 }, { "epoch": 0.58, "grad_norm": 1.4378931010775844, "learning_rate": 3.879745268348139e-06, "loss": 0.8787, "step": 9136 }, { "epoch": 0.58, "grad_norm": 1.7143464100739605, "learning_rate": 3.878735088751315e-06, "loss": 0.6592, "step": 9137 }, { "epoch": 0.58, "grad_norm": 1.785098234314245, "learning_rate": 3.877724957344099e-06, "loss": 0.7341, "step": 9138 }, { "epoch": 0.58, "grad_norm": 1.7094503822320166, "learning_rate": 3.876714874169906e-06, "loss": 0.7115, "step": 9139 }, { "epoch": 0.59, "grad_norm": 1.402488866714677, "learning_rate": 3.87570483927215e-06, "loss": 0.6802, "step": 9140 }, { "epoch": 0.59, "grad_norm": 1.304222889232047, "learning_rate": 3.874694852694236e-06, "loss": 0.6447, "step": 9141 }, { "epoch": 0.59, "grad_norm": 1.3549231019173165, "learning_rate": 3.873684914479572e-06, "loss": 0.6598, "step": 9142 }, { "epoch": 0.59, "grad_norm": 1.5881783257471482, "learning_rate": 3.8726750246715635e-06, "loss": 0.6323, "step": 9143 }, { "epoch": 0.59, "grad_norm": 1.4206723574095241, "learning_rate": 3.871665183313617e-06, "loss": 0.6565, "step": 9144 }, { "epoch": 0.59, "grad_norm": 1.7593680721459775, "learning_rate": 3.870655390449126e-06, "loss": 0.6324, "step": 9145 }, { "epoch": 0.59, "grad_norm": 0.978881486289268, "learning_rate": 3.869645646121496e-06, "loss": 0.5748, "step": 9146 }, { "epoch": 0.59, "grad_norm": 1.6794801924370748, "learning_rate": 3.868635950374119e-06, "loss": 0.7747, "step": 9147 }, { "epoch": 0.59, "grad_norm": 1.41778737345989, "learning_rate": 3.867626303250392e-06, "loss": 0.6372, "step": 9148 }, { "epoch": 0.59, "grad_norm": 1.7374950471769064, "learning_rate": 3.866616704793706e-06, "loss": 0.6686, "step": 9149 }, { "epoch": 0.59, "grad_norm": 1.7103466504249756, "learning_rate": 3.865607155047455e-06, "loss": 0.6709, "step": 9150 }, { "epoch": 0.59, "grad_norm": 1.5099264621550803, "learning_rate": 3.864597654055024e-06, "loss": 0.7207, "step": 9151 }, { "epoch": 0.59, "grad_norm": 1.678785569269383, "learning_rate": 3.863588201859798e-06, "loss": 0.7029, "step": 9152 }, { "epoch": 0.59, "grad_norm": 1.2881088364558706, "learning_rate": 3.862578798505166e-06, "loss": 0.6381, "step": 9153 }, { "epoch": 0.59, "grad_norm": 1.5655851067197608, "learning_rate": 3.861569444034505e-06, "loss": 0.6772, "step": 9154 }, { "epoch": 0.59, "grad_norm": 1.9377563925722985, "learning_rate": 3.860560138491199e-06, "loss": 0.7143, "step": 9155 }, { "epoch": 0.59, "grad_norm": 1.4989707877453873, "learning_rate": 3.859550881918622e-06, "loss": 0.6501, "step": 9156 }, { "epoch": 0.59, "grad_norm": 1.8052066779497997, "learning_rate": 3.858541674360151e-06, "loss": 0.7562, "step": 9157 }, { "epoch": 0.59, "grad_norm": 1.1023403748895317, "learning_rate": 3.857532515859163e-06, "loss": 0.5587, "step": 9158 }, { "epoch": 0.59, "grad_norm": 1.7376692161236702, "learning_rate": 3.856523406459025e-06, "loss": 0.6921, "step": 9159 }, { "epoch": 0.59, "grad_norm": 1.458568663356173, "learning_rate": 3.85551434620311e-06, "loss": 0.7256, "step": 9160 }, { "epoch": 0.59, "grad_norm": 1.5000066785211124, "learning_rate": 3.854505335134781e-06, "loss": 0.6173, "step": 9161 }, { "epoch": 0.59, "grad_norm": 1.6965396982412237, "learning_rate": 3.853496373297408e-06, "loss": 0.6806, "step": 9162 }, { "epoch": 0.59, "grad_norm": 1.4986761970721523, "learning_rate": 3.852487460734351e-06, "loss": 0.6357, "step": 9163 }, { "epoch": 0.59, "grad_norm": 1.2152387902021593, "learning_rate": 3.8514785974889714e-06, "loss": 0.623, "step": 9164 }, { "epoch": 0.59, "grad_norm": 1.5104572801962448, "learning_rate": 3.850469783604628e-06, "loss": 0.579, "step": 9165 }, { "epoch": 0.59, "grad_norm": 1.62111867490895, "learning_rate": 3.849461019124678e-06, "loss": 0.6824, "step": 9166 }, { "epoch": 0.59, "grad_norm": 1.6212276649828876, "learning_rate": 3.848452304092477e-06, "loss": 0.7229, "step": 9167 }, { "epoch": 0.59, "grad_norm": 1.5795310698082603, "learning_rate": 3.8474436385513735e-06, "loss": 0.6998, "step": 9168 }, { "epoch": 0.59, "grad_norm": 1.460148486509596, "learning_rate": 3.846435022544724e-06, "loss": 0.6596, "step": 9169 }, { "epoch": 0.59, "grad_norm": 1.3380818796743243, "learning_rate": 3.845426456115871e-06, "loss": 0.6615, "step": 9170 }, { "epoch": 0.59, "grad_norm": 1.7683684697635682, "learning_rate": 3.844417939308163e-06, "loss": 0.7635, "step": 9171 }, { "epoch": 0.59, "grad_norm": 1.3830421357258342, "learning_rate": 3.8434094721649435e-06, "loss": 0.5528, "step": 9172 }, { "epoch": 0.59, "grad_norm": 1.5585607589789798, "learning_rate": 3.842401054729557e-06, "loss": 0.6404, "step": 9173 }, { "epoch": 0.59, "grad_norm": 1.5598696734093327, "learning_rate": 3.841392687045338e-06, "loss": 0.617, "step": 9174 }, { "epoch": 0.59, "grad_norm": 1.6409798415982295, "learning_rate": 3.840384369155626e-06, "loss": 0.6994, "step": 9175 }, { "epoch": 0.59, "grad_norm": 1.8545003846692742, "learning_rate": 3.83937610110376e-06, "loss": 0.6081, "step": 9176 }, { "epoch": 0.59, "grad_norm": 1.5442251853397306, "learning_rate": 3.838367882933068e-06, "loss": 0.6907, "step": 9177 }, { "epoch": 0.59, "grad_norm": 1.5688626946936897, "learning_rate": 3.837359714686885e-06, "loss": 0.6589, "step": 9178 }, { "epoch": 0.59, "grad_norm": 1.44709268572441, "learning_rate": 3.836351596408537e-06, "loss": 0.6118, "step": 9179 }, { "epoch": 0.59, "grad_norm": 1.498758396022278, "learning_rate": 3.835343528141355e-06, "loss": 0.6243, "step": 9180 }, { "epoch": 0.59, "grad_norm": 1.5535876328863432, "learning_rate": 3.834335509928658e-06, "loss": 0.725, "step": 9181 }, { "epoch": 0.59, "grad_norm": 1.534558269753094, "learning_rate": 3.833327541813774e-06, "loss": 0.7428, "step": 9182 }, { "epoch": 0.59, "grad_norm": 1.3101876733745497, "learning_rate": 3.832319623840018e-06, "loss": 0.6865, "step": 9183 }, { "epoch": 0.59, "grad_norm": 1.6148551648089344, "learning_rate": 3.831311756050712e-06, "loss": 0.6294, "step": 9184 }, { "epoch": 0.59, "grad_norm": 1.5693541926651122, "learning_rate": 3.830303938489172e-06, "loss": 0.619, "step": 9185 }, { "epoch": 0.59, "grad_norm": 1.995200646517762, "learning_rate": 3.829296171198709e-06, "loss": 0.666, "step": 9186 }, { "epoch": 0.59, "grad_norm": 1.992171379825233, "learning_rate": 3.8282884542226385e-06, "loss": 0.6822, "step": 9187 }, { "epoch": 0.59, "grad_norm": 1.685504682888567, "learning_rate": 3.827280787604266e-06, "loss": 0.7282, "step": 9188 }, { "epoch": 0.59, "grad_norm": 1.4393485147922453, "learning_rate": 3.826273171386904e-06, "loss": 0.6921, "step": 9189 }, { "epoch": 0.59, "grad_norm": 1.5700802858575613, "learning_rate": 3.825265605613852e-06, "loss": 0.6478, "step": 9190 }, { "epoch": 0.59, "grad_norm": 1.5912453060110698, "learning_rate": 3.8242580903284186e-06, "loss": 0.6944, "step": 9191 }, { "epoch": 0.59, "grad_norm": 1.4454610600484021, "learning_rate": 3.823250625573899e-06, "loss": 0.6857, "step": 9192 }, { "epoch": 0.59, "grad_norm": 1.5999912551594566, "learning_rate": 3.822243211393596e-06, "loss": 0.7266, "step": 9193 }, { "epoch": 0.59, "grad_norm": 1.585636250124729, "learning_rate": 3.821235847830804e-06, "loss": 0.6585, "step": 9194 }, { "epoch": 0.59, "grad_norm": 1.8343730250406287, "learning_rate": 3.820228534928818e-06, "loss": 0.7103, "step": 9195 }, { "epoch": 0.59, "grad_norm": 1.8489740493359437, "learning_rate": 3.819221272730933e-06, "loss": 0.7329, "step": 9196 }, { "epoch": 0.59, "grad_norm": 1.539692326252725, "learning_rate": 3.818214061280435e-06, "loss": 0.6189, "step": 9197 }, { "epoch": 0.59, "grad_norm": 1.4884234721987646, "learning_rate": 3.817206900620615e-06, "loss": 0.6676, "step": 9198 }, { "epoch": 0.59, "grad_norm": 1.5855213544522904, "learning_rate": 3.816199790794754e-06, "loss": 0.6916, "step": 9199 }, { "epoch": 0.59, "grad_norm": 1.9650488253630658, "learning_rate": 3.815192731846141e-06, "loss": 0.6938, "step": 9200 }, { "epoch": 0.59, "grad_norm": 1.7560949056247306, "learning_rate": 3.8141857238180537e-06, "loss": 0.6519, "step": 9201 }, { "epoch": 0.59, "grad_norm": 1.5426119663190636, "learning_rate": 3.8131787667537734e-06, "loss": 0.5983, "step": 9202 }, { "epoch": 0.59, "grad_norm": 1.3325946666897492, "learning_rate": 3.812171860696574e-06, "loss": 0.591, "step": 9203 }, { "epoch": 0.59, "grad_norm": 1.0458803056856762, "learning_rate": 3.811165005689732e-06, "loss": 0.685, "step": 9204 }, { "epoch": 0.59, "grad_norm": 1.3900827316818753, "learning_rate": 3.810158201776523e-06, "loss": 0.5794, "step": 9205 }, { "epoch": 0.59, "grad_norm": 1.9798329862652453, "learning_rate": 3.8091514490002108e-06, "loss": 0.6939, "step": 9206 }, { "epoch": 0.59, "grad_norm": 1.6184130837954347, "learning_rate": 3.808144747404069e-06, "loss": 0.76, "step": 9207 }, { "epoch": 0.59, "grad_norm": 1.6580894976636114, "learning_rate": 3.8071380970313597e-06, "loss": 0.7054, "step": 9208 }, { "epoch": 0.59, "grad_norm": 1.4825909341134742, "learning_rate": 3.806131497925351e-06, "loss": 0.6499, "step": 9209 }, { "epoch": 0.59, "grad_norm": 1.7384982114717253, "learning_rate": 3.8051249501293e-06, "loss": 0.6124, "step": 9210 }, { "epoch": 0.59, "grad_norm": 1.5043721329937938, "learning_rate": 3.8041184536864694e-06, "loss": 0.6915, "step": 9211 }, { "epoch": 0.59, "grad_norm": 1.552314553111717, "learning_rate": 3.803112008640113e-06, "loss": 0.6565, "step": 9212 }, { "epoch": 0.59, "grad_norm": 1.541725562181317, "learning_rate": 3.802105615033488e-06, "loss": 0.7018, "step": 9213 }, { "epoch": 0.59, "grad_norm": 1.7405880530411004, "learning_rate": 3.8010992729098466e-06, "loss": 0.6451, "step": 9214 }, { "epoch": 0.59, "grad_norm": 0.9898320844883385, "learning_rate": 3.8000929823124387e-06, "loss": 0.6392, "step": 9215 }, { "epoch": 0.59, "grad_norm": 1.4139704571851557, "learning_rate": 3.799086743284515e-06, "loss": 0.6243, "step": 9216 }, { "epoch": 0.59, "grad_norm": 1.6163840404279435, "learning_rate": 3.798080555869318e-06, "loss": 0.6828, "step": 9217 }, { "epoch": 0.59, "grad_norm": 1.6338269609772895, "learning_rate": 3.7970744201100944e-06, "loss": 0.6924, "step": 9218 }, { "epoch": 0.59, "grad_norm": 1.5031655182554036, "learning_rate": 3.796068336050083e-06, "loss": 0.6466, "step": 9219 }, { "epoch": 0.59, "grad_norm": 1.5194895559086157, "learning_rate": 3.795062303732525e-06, "loss": 0.7002, "step": 9220 }, { "epoch": 0.59, "grad_norm": 1.431533609755401, "learning_rate": 3.7940563232006573e-06, "loss": 0.6756, "step": 9221 }, { "epoch": 0.59, "grad_norm": 1.4693819039043978, "learning_rate": 3.7930503944977153e-06, "loss": 0.6666, "step": 9222 }, { "epoch": 0.59, "grad_norm": 1.359757384256402, "learning_rate": 3.792044517666931e-06, "loss": 0.6692, "step": 9223 }, { "epoch": 0.59, "grad_norm": 1.3566271321467105, "learning_rate": 3.7910386927515346e-06, "loss": 0.6311, "step": 9224 }, { "epoch": 0.59, "grad_norm": 1.4211138782296402, "learning_rate": 3.790032919794757e-06, "loss": 0.6463, "step": 9225 }, { "epoch": 0.59, "grad_norm": 1.5048184192355483, "learning_rate": 3.7890271988398186e-06, "loss": 0.655, "step": 9226 }, { "epoch": 0.59, "grad_norm": 2.2418481391555165, "learning_rate": 3.788021529929949e-06, "loss": 0.7086, "step": 9227 }, { "epoch": 0.59, "grad_norm": 1.1208539391421286, "learning_rate": 3.7870159131083668e-06, "loss": 0.6299, "step": 9228 }, { "epoch": 0.59, "grad_norm": 1.7006649881015006, "learning_rate": 3.786010348418292e-06, "loss": 0.715, "step": 9229 }, { "epoch": 0.59, "grad_norm": 1.6962665971410607, "learning_rate": 3.78500483590294e-06, "loss": 0.6624, "step": 9230 }, { "epoch": 0.59, "grad_norm": 1.5896023668852894, "learning_rate": 3.783999375605527e-06, "loss": 0.5999, "step": 9231 }, { "epoch": 0.59, "grad_norm": 1.423435666257755, "learning_rate": 3.7829939675692683e-06, "loss": 0.6488, "step": 9232 }, { "epoch": 0.59, "grad_norm": 1.494990079140442, "learning_rate": 3.7819886118373694e-06, "loss": 0.7038, "step": 9233 }, { "epoch": 0.59, "grad_norm": 1.4334953389745457, "learning_rate": 3.7809833084530427e-06, "loss": 0.6605, "step": 9234 }, { "epoch": 0.59, "grad_norm": 1.5933755830909446, "learning_rate": 3.77997805745949e-06, "loss": 0.6304, "step": 9235 }, { "epoch": 0.59, "grad_norm": 1.3723340855858241, "learning_rate": 3.7789728588999176e-06, "loss": 0.6761, "step": 9236 }, { "epoch": 0.59, "grad_norm": 1.4619576947270654, "learning_rate": 3.7779677128175247e-06, "loss": 0.6425, "step": 9237 }, { "epoch": 0.59, "grad_norm": 1.6131476426227767, "learning_rate": 3.7769626192555143e-06, "loss": 0.6659, "step": 9238 }, { "epoch": 0.59, "grad_norm": 1.823926973859333, "learning_rate": 3.775957578257078e-06, "loss": 0.6327, "step": 9239 }, { "epoch": 0.59, "grad_norm": 1.5748948212549583, "learning_rate": 3.774952589865413e-06, "loss": 0.6044, "step": 9240 }, { "epoch": 0.59, "grad_norm": 1.6427061600932726, "learning_rate": 3.7739476541237137e-06, "loss": 0.6426, "step": 9241 }, { "epoch": 0.59, "grad_norm": 1.6647300867927495, "learning_rate": 3.7729427710751654e-06, "loss": 0.7131, "step": 9242 }, { "epoch": 0.59, "grad_norm": 1.4115942094516287, "learning_rate": 3.7719379407629597e-06, "loss": 0.6689, "step": 9243 }, { "epoch": 0.59, "grad_norm": 1.5835921549145653, "learning_rate": 3.77093316323028e-06, "loss": 0.6467, "step": 9244 }, { "epoch": 0.59, "grad_norm": 1.6605114027176187, "learning_rate": 3.769928438520312e-06, "loss": 0.6001, "step": 9245 }, { "epoch": 0.59, "grad_norm": 1.501065177951705, "learning_rate": 3.7689237666762335e-06, "loss": 0.6335, "step": 9246 }, { "epoch": 0.59, "grad_norm": 1.7191360610064186, "learning_rate": 3.7679191477412263e-06, "loss": 0.6886, "step": 9247 }, { "epoch": 0.59, "grad_norm": 3.240378423715802, "learning_rate": 3.7669145817584635e-06, "loss": 0.7095, "step": 9248 }, { "epoch": 0.59, "grad_norm": 1.5926387337689665, "learning_rate": 3.7659100687711205e-06, "loss": 0.7237, "step": 9249 }, { "epoch": 0.59, "grad_norm": 1.6261294359293734, "learning_rate": 3.7649056088223705e-06, "loss": 0.6206, "step": 9250 }, { "epoch": 0.59, "grad_norm": 1.4954128661305213, "learning_rate": 3.7639012019553813e-06, "loss": 0.6929, "step": 9251 }, { "epoch": 0.59, "grad_norm": 1.3321265002754445, "learning_rate": 3.762896848213323e-06, "loss": 0.6678, "step": 9252 }, { "epoch": 0.59, "grad_norm": 1.4891646728576906, "learning_rate": 3.761892547639357e-06, "loss": 0.6979, "step": 9253 }, { "epoch": 0.59, "grad_norm": 1.7218859077051771, "learning_rate": 3.7608883002766496e-06, "loss": 0.7009, "step": 9254 }, { "epoch": 0.59, "grad_norm": 0.9432235200494048, "learning_rate": 3.7598841061683566e-06, "loss": 0.5683, "step": 9255 }, { "epoch": 0.59, "grad_norm": 1.3597825314256107, "learning_rate": 3.758879965357641e-06, "loss": 0.6386, "step": 9256 }, { "epoch": 0.59, "grad_norm": 1.6046312572978167, "learning_rate": 3.7578758778876546e-06, "loss": 0.5841, "step": 9257 }, { "epoch": 0.59, "grad_norm": 1.8747062758351418, "learning_rate": 3.7568718438015532e-06, "loss": 0.6528, "step": 9258 }, { "epoch": 0.59, "grad_norm": 1.0468820798043315, "learning_rate": 3.75586786314249e-06, "loss": 0.6473, "step": 9259 }, { "epoch": 0.59, "grad_norm": 1.4878235905791266, "learning_rate": 3.754863935953609e-06, "loss": 0.6366, "step": 9260 }, { "epoch": 0.59, "grad_norm": 1.577761231629777, "learning_rate": 3.753860062278063e-06, "loss": 0.6311, "step": 9261 }, { "epoch": 0.59, "grad_norm": 1.5057082224089025, "learning_rate": 3.7528562421589898e-06, "loss": 0.6561, "step": 9262 }, { "epoch": 0.59, "grad_norm": 1.437535620382076, "learning_rate": 3.7518524756395362e-06, "loss": 0.6713, "step": 9263 }, { "epoch": 0.59, "grad_norm": 1.5522980408834044, "learning_rate": 3.750848762762841e-06, "loss": 0.6264, "step": 9264 }, { "epoch": 0.59, "grad_norm": 1.8832169049429852, "learning_rate": 3.74984510357204e-06, "loss": 0.7036, "step": 9265 }, { "epoch": 0.59, "grad_norm": 1.5015296159252365, "learning_rate": 3.7488414981102693e-06, "loss": 0.6941, "step": 9266 }, { "epoch": 0.59, "grad_norm": 1.4771443925193475, "learning_rate": 3.747837946420664e-06, "loss": 0.6816, "step": 9267 }, { "epoch": 0.59, "grad_norm": 1.1935649151872103, "learning_rate": 3.7468344485463505e-06, "loss": 0.6127, "step": 9268 }, { "epoch": 0.59, "grad_norm": 1.5392608620382662, "learning_rate": 3.7458310045304586e-06, "loss": 0.6376, "step": 9269 }, { "epoch": 0.59, "grad_norm": 1.6252709656773987, "learning_rate": 3.7448276144161174e-06, "loss": 0.7652, "step": 9270 }, { "epoch": 0.59, "grad_norm": 2.0556952169758342, "learning_rate": 3.7438242782464453e-06, "loss": 0.8004, "step": 9271 }, { "epoch": 0.59, "grad_norm": 1.4203014773879252, "learning_rate": 3.742820996064568e-06, "loss": 0.6477, "step": 9272 }, { "epoch": 0.59, "grad_norm": 1.918509508045296, "learning_rate": 3.7418177679136005e-06, "loss": 0.8297, "step": 9273 }, { "epoch": 0.59, "grad_norm": 1.5170038637939927, "learning_rate": 3.740814593836664e-06, "loss": 0.6655, "step": 9274 }, { "epoch": 0.59, "grad_norm": 2.1848794718294, "learning_rate": 3.7398114738768686e-06, "loss": 0.6802, "step": 9275 }, { "epoch": 0.59, "grad_norm": 1.6073423604543924, "learning_rate": 3.73880840807733e-06, "loss": 0.6833, "step": 9276 }, { "epoch": 0.59, "grad_norm": 1.7368893586281475, "learning_rate": 3.7378053964811534e-06, "loss": 0.7522, "step": 9277 }, { "epoch": 0.59, "grad_norm": 1.4115628018974518, "learning_rate": 3.736802439131449e-06, "loss": 0.7073, "step": 9278 }, { "epoch": 0.59, "grad_norm": 1.5775372170228839, "learning_rate": 3.735799536071322e-06, "loss": 0.6553, "step": 9279 }, { "epoch": 0.59, "grad_norm": 1.6765527858072284, "learning_rate": 3.734796687343874e-06, "loss": 0.7145, "step": 9280 }, { "epoch": 0.59, "grad_norm": 1.6606742092298545, "learning_rate": 3.733793892992208e-06, "loss": 0.7389, "step": 9281 }, { "epoch": 0.59, "grad_norm": 1.0480310962348995, "learning_rate": 3.7327911530594173e-06, "loss": 0.658, "step": 9282 }, { "epoch": 0.59, "grad_norm": 1.5437598125675591, "learning_rate": 3.7317884675886025e-06, "loss": 0.6185, "step": 9283 }, { "epoch": 0.59, "grad_norm": 1.5470989855348627, "learning_rate": 3.7307858366228523e-06, "loss": 0.6199, "step": 9284 }, { "epoch": 0.59, "grad_norm": 1.6994461217811998, "learning_rate": 3.7297832602052608e-06, "loss": 0.7362, "step": 9285 }, { "epoch": 0.59, "grad_norm": 1.4128288734881076, "learning_rate": 3.7287807383789143e-06, "loss": 0.6928, "step": 9286 }, { "epoch": 0.59, "grad_norm": 2.0102264777137147, "learning_rate": 3.727778271186901e-06, "loss": 0.6838, "step": 9287 }, { "epoch": 0.59, "grad_norm": 1.5552212642023886, "learning_rate": 3.7267758586723065e-06, "loss": 0.7314, "step": 9288 }, { "epoch": 0.59, "grad_norm": 1.1429245407042719, "learning_rate": 3.7257735008782076e-06, "loss": 0.7584, "step": 9289 }, { "epoch": 0.59, "grad_norm": 1.5965616857182396, "learning_rate": 3.7247711978476885e-06, "loss": 0.6732, "step": 9290 }, { "epoch": 0.59, "grad_norm": 1.3604991291505721, "learning_rate": 3.7237689496238217e-06, "loss": 0.6161, "step": 9291 }, { "epoch": 0.59, "grad_norm": 1.5889351362936543, "learning_rate": 3.722766756249684e-06, "loss": 0.6507, "step": 9292 }, { "epoch": 0.59, "grad_norm": 1.5552255994083108, "learning_rate": 3.721764617768347e-06, "loss": 0.6223, "step": 9293 }, { "epoch": 0.59, "grad_norm": 1.415124725854275, "learning_rate": 3.7207625342228826e-06, "loss": 0.6268, "step": 9294 }, { "epoch": 0.59, "grad_norm": 1.4323697069183328, "learning_rate": 3.7197605056563545e-06, "loss": 0.698, "step": 9295 }, { "epoch": 0.6, "grad_norm": 1.826240317364628, "learning_rate": 3.7187585321118293e-06, "loss": 0.5755, "step": 9296 }, { "epoch": 0.6, "grad_norm": 1.0517747389573742, "learning_rate": 3.7177566136323726e-06, "loss": 0.6181, "step": 9297 }, { "epoch": 0.6, "grad_norm": 1.6086536880535507, "learning_rate": 3.71675475026104e-06, "loss": 0.6842, "step": 9298 }, { "epoch": 0.6, "grad_norm": 1.5428109562797412, "learning_rate": 3.7157529420408933e-06, "loss": 0.5822, "step": 9299 }, { "epoch": 0.6, "grad_norm": 0.9712581150012028, "learning_rate": 3.7147511890149864e-06, "loss": 0.6154, "step": 9300 }, { "epoch": 0.6, "grad_norm": 1.8255379605159636, "learning_rate": 3.7137494912263723e-06, "loss": 0.6722, "step": 9301 }, { "epoch": 0.6, "grad_norm": 1.4263921142342726, "learning_rate": 3.7127478487181014e-06, "loss": 0.6519, "step": 9302 }, { "epoch": 0.6, "grad_norm": 1.7107563994438941, "learning_rate": 3.7117462615332254e-06, "loss": 0.6256, "step": 9303 }, { "epoch": 0.6, "grad_norm": 1.53390435027697, "learning_rate": 3.710744729714786e-06, "loss": 0.7521, "step": 9304 }, { "epoch": 0.6, "grad_norm": 1.3773259177438968, "learning_rate": 3.7097432533058288e-06, "loss": 0.5773, "step": 9305 }, { "epoch": 0.6, "grad_norm": 1.4898596224216265, "learning_rate": 3.708741832349397e-06, "loss": 0.7434, "step": 9306 }, { "epoch": 0.6, "grad_norm": 1.5975427099602921, "learning_rate": 3.7077404668885263e-06, "loss": 0.7467, "step": 9307 }, { "epoch": 0.6, "grad_norm": 1.6828028756239242, "learning_rate": 3.7067391569662564e-06, "loss": 0.6978, "step": 9308 }, { "epoch": 0.6, "grad_norm": 1.793171714857231, "learning_rate": 3.7057379026256185e-06, "loss": 0.7096, "step": 9309 }, { "epoch": 0.6, "grad_norm": 1.4382066833487113, "learning_rate": 3.7047367039096483e-06, "loss": 0.6191, "step": 9310 }, { "epoch": 0.6, "grad_norm": 1.4065914214938235, "learning_rate": 3.7037355608613703e-06, "loss": 0.7167, "step": 9311 }, { "epoch": 0.6, "grad_norm": 2.223186115474309, "learning_rate": 3.7027344735238168e-06, "loss": 0.6765, "step": 9312 }, { "epoch": 0.6, "grad_norm": 1.5032571396405592, "learning_rate": 3.701733441940007e-06, "loss": 0.696, "step": 9313 }, { "epoch": 0.6, "grad_norm": 1.4750244382479656, "learning_rate": 3.7007324661529663e-06, "loss": 0.7134, "step": 9314 }, { "epoch": 0.6, "grad_norm": 1.4644752447909188, "learning_rate": 3.699731546205715e-06, "loss": 0.67, "step": 9315 }, { "epoch": 0.6, "grad_norm": 1.6291989255008434, "learning_rate": 3.698730682141268e-06, "loss": 0.6668, "step": 9316 }, { "epoch": 0.6, "grad_norm": 1.4601510758064307, "learning_rate": 3.6977298740026448e-06, "loss": 0.651, "step": 9317 }, { "epoch": 0.6, "grad_norm": 1.1346869842313612, "learning_rate": 3.6967291218328525e-06, "loss": 0.6459, "step": 9318 }, { "epoch": 0.6, "grad_norm": 1.4916072176301196, "learning_rate": 3.695728425674906e-06, "loss": 0.6039, "step": 9319 }, { "epoch": 0.6, "grad_norm": 1.6025664425978037, "learning_rate": 3.69472778557181e-06, "loss": 0.6829, "step": 9320 }, { "epoch": 0.6, "grad_norm": 1.611072175128888, "learning_rate": 3.693727201566571e-06, "loss": 0.6943, "step": 9321 }, { "epoch": 0.6, "grad_norm": 2.2768746812972998, "learning_rate": 3.6927266737021915e-06, "loss": 0.6692, "step": 9322 }, { "epoch": 0.6, "grad_norm": 1.5942213504834357, "learning_rate": 3.6917262020216727e-06, "loss": 0.6702, "step": 9323 }, { "epoch": 0.6, "grad_norm": 1.3627905227102552, "learning_rate": 3.6907257865680146e-06, "loss": 0.7013, "step": 9324 }, { "epoch": 0.6, "grad_norm": 1.4860721375834631, "learning_rate": 3.6897254273842087e-06, "loss": 0.6025, "step": 9325 }, { "epoch": 0.6, "grad_norm": 1.8165945535092864, "learning_rate": 3.688725124513253e-06, "loss": 0.7444, "step": 9326 }, { "epoch": 0.6, "grad_norm": 1.4392902002414019, "learning_rate": 3.6877248779981332e-06, "loss": 0.6576, "step": 9327 }, { "epoch": 0.6, "grad_norm": 1.473216781298626, "learning_rate": 3.686724687881843e-06, "loss": 0.5718, "step": 9328 }, { "epoch": 0.6, "grad_norm": 1.582196219284536, "learning_rate": 3.685724554207365e-06, "loss": 0.5966, "step": 9329 }, { "epoch": 0.6, "grad_norm": 1.5961363267841429, "learning_rate": 3.684724477017685e-06, "loss": 0.6659, "step": 9330 }, { "epoch": 0.6, "grad_norm": 1.4713622026281064, "learning_rate": 3.6837244563557815e-06, "loss": 0.6596, "step": 9331 }, { "epoch": 0.6, "grad_norm": 1.395160820637658, "learning_rate": 3.6827244922646354e-06, "loss": 0.6963, "step": 9332 }, { "epoch": 0.6, "grad_norm": 1.7123399488080215, "learning_rate": 3.6817245847872253e-06, "loss": 0.8153, "step": 9333 }, { "epoch": 0.6, "grad_norm": 1.0904568161547545, "learning_rate": 3.6807247339665192e-06, "loss": 0.7131, "step": 9334 }, { "epoch": 0.6, "grad_norm": 1.4042624822276182, "learning_rate": 3.679724939845494e-06, "loss": 0.7311, "step": 9335 }, { "epoch": 0.6, "grad_norm": 1.7025636159327293, "learning_rate": 3.6787252024671143e-06, "loss": 0.7282, "step": 9336 }, { "epoch": 0.6, "grad_norm": 1.659126747929657, "learning_rate": 3.677725521874352e-06, "loss": 0.686, "step": 9337 }, { "epoch": 0.6, "grad_norm": 1.726526912201757, "learning_rate": 3.6767258981101655e-06, "loss": 0.6752, "step": 9338 }, { "epoch": 0.6, "grad_norm": 1.3755760395193162, "learning_rate": 3.6757263312175216e-06, "loss": 0.6899, "step": 9339 }, { "epoch": 0.6, "grad_norm": 1.6411307024647526, "learning_rate": 3.674726821239376e-06, "loss": 0.6424, "step": 9340 }, { "epoch": 0.6, "grad_norm": 1.4522285774729389, "learning_rate": 3.673727368218687e-06, "loss": 0.7001, "step": 9341 }, { "epoch": 0.6, "grad_norm": 1.4046424361350625, "learning_rate": 3.6727279721984093e-06, "loss": 0.7071, "step": 9342 }, { "epoch": 0.6, "grad_norm": 1.3250289366618921, "learning_rate": 3.671728633221493e-06, "loss": 0.6715, "step": 9343 }, { "epoch": 0.6, "grad_norm": 1.8119305720661028, "learning_rate": 3.6707293513308906e-06, "loss": 0.6357, "step": 9344 }, { "epoch": 0.6, "grad_norm": 1.729221265014689, "learning_rate": 3.669730126569546e-06, "loss": 0.6603, "step": 9345 }, { "epoch": 0.6, "grad_norm": 3.2195802788950734, "learning_rate": 3.668730958980407e-06, "loss": 0.6307, "step": 9346 }, { "epoch": 0.6, "grad_norm": 1.2264313463897925, "learning_rate": 3.667731848606413e-06, "loss": 0.5759, "step": 9347 }, { "epoch": 0.6, "grad_norm": 1.4226391179220024, "learning_rate": 3.6667327954905054e-06, "loss": 0.6507, "step": 9348 }, { "epoch": 0.6, "grad_norm": 1.582580008155869, "learning_rate": 3.665733799675619e-06, "loss": 0.6757, "step": 9349 }, { "epoch": 0.6, "grad_norm": 1.493385446649688, "learning_rate": 3.664734861204692e-06, "loss": 0.6373, "step": 9350 }, { "epoch": 0.6, "grad_norm": 1.125271307309334, "learning_rate": 3.663735980120653e-06, "loss": 0.5959, "step": 9351 }, { "epoch": 0.6, "grad_norm": 1.6588451048252795, "learning_rate": 3.662737156466434e-06, "loss": 0.6163, "step": 9352 }, { "epoch": 0.6, "grad_norm": 1.1244323960737725, "learning_rate": 3.6617383902849645e-06, "loss": 0.681, "step": 9353 }, { "epoch": 0.6, "grad_norm": 1.4864050405969333, "learning_rate": 3.6607396816191644e-06, "loss": 0.6746, "step": 9354 }, { "epoch": 0.6, "grad_norm": 1.7280332269613932, "learning_rate": 3.6597410305119605e-06, "loss": 0.689, "step": 9355 }, { "epoch": 0.6, "grad_norm": 1.3544910460832353, "learning_rate": 3.6587424370062696e-06, "loss": 0.5953, "step": 9356 }, { "epoch": 0.6, "grad_norm": 1.5877814150911946, "learning_rate": 3.6577439011450112e-06, "loss": 0.7008, "step": 9357 }, { "epoch": 0.6, "grad_norm": 1.6235985682151455, "learning_rate": 3.6567454229710973e-06, "loss": 0.5976, "step": 9358 }, { "epoch": 0.6, "grad_norm": 1.5185677242949531, "learning_rate": 3.6557470025274453e-06, "loss": 0.7309, "step": 9359 }, { "epoch": 0.6, "grad_norm": 1.7084298731421697, "learning_rate": 3.654748639856961e-06, "loss": 0.6469, "step": 9360 }, { "epoch": 0.6, "grad_norm": 1.4036551285181815, "learning_rate": 3.6537503350025525e-06, "loss": 0.7535, "step": 9361 }, { "epoch": 0.6, "grad_norm": 1.4259157033233008, "learning_rate": 3.652752088007129e-06, "loss": 0.7561, "step": 9362 }, { "epoch": 0.6, "grad_norm": 1.6523004966939698, "learning_rate": 3.6517538989135866e-06, "loss": 0.7274, "step": 9363 }, { "epoch": 0.6, "grad_norm": 1.5754379442549622, "learning_rate": 3.6507557677648297e-06, "loss": 0.6992, "step": 9364 }, { "epoch": 0.6, "grad_norm": 1.3952600358369278, "learning_rate": 3.649757694603754e-06, "loss": 0.5987, "step": 9365 }, { "epoch": 0.6, "grad_norm": 1.3860377597175622, "learning_rate": 3.6487596794732573e-06, "loss": 0.592, "step": 9366 }, { "epoch": 0.6, "grad_norm": 1.7324600789984015, "learning_rate": 3.647761722416229e-06, "loss": 0.6829, "step": 9367 }, { "epoch": 0.6, "grad_norm": 1.3609886346265714, "learning_rate": 3.646763823475561e-06, "loss": 0.6254, "step": 9368 }, { "epoch": 0.6, "grad_norm": 1.6319082343926843, "learning_rate": 3.645765982694139e-06, "loss": 0.6422, "step": 9369 }, { "epoch": 0.6, "grad_norm": 1.6792626095981726, "learning_rate": 3.6447682001148497e-06, "loss": 0.6319, "step": 9370 }, { "epoch": 0.6, "grad_norm": 1.5429103678946616, "learning_rate": 3.643770475780576e-06, "loss": 0.6498, "step": 9371 }, { "epoch": 0.6, "grad_norm": 1.414478785951931, "learning_rate": 3.6427728097341963e-06, "loss": 0.5971, "step": 9372 }, { "epoch": 0.6, "grad_norm": 1.4031369195841485, "learning_rate": 3.641775202018592e-06, "loss": 0.5832, "step": 9373 }, { "epoch": 0.6, "grad_norm": 1.661963954807902, "learning_rate": 3.640777652676633e-06, "loss": 0.7353, "step": 9374 }, { "epoch": 0.6, "grad_norm": 1.5750892750139955, "learning_rate": 3.6397801617511965e-06, "loss": 0.6852, "step": 9375 }, { "epoch": 0.6, "grad_norm": 1.4508018851687037, "learning_rate": 3.6387827292851487e-06, "loss": 0.6192, "step": 9376 }, { "epoch": 0.6, "grad_norm": 1.0528082023836967, "learning_rate": 3.6377853553213593e-06, "loss": 0.6013, "step": 9377 }, { "epoch": 0.6, "grad_norm": 1.629634863553269, "learning_rate": 3.6367880399026923e-06, "loss": 0.7383, "step": 9378 }, { "epoch": 0.6, "grad_norm": 1.4575937143824664, "learning_rate": 3.635790783072012e-06, "loss": 0.6938, "step": 9379 }, { "epoch": 0.6, "grad_norm": 1.659135327747872, "learning_rate": 3.6347935848721766e-06, "loss": 0.6699, "step": 9380 }, { "epoch": 0.6, "grad_norm": 1.6174882071528882, "learning_rate": 3.633796445346044e-06, "loss": 0.7415, "step": 9381 }, { "epoch": 0.6, "grad_norm": 1.8214906948296425, "learning_rate": 3.6327993645364704e-06, "loss": 0.7493, "step": 9382 }, { "epoch": 0.6, "grad_norm": 1.3810445566830554, "learning_rate": 3.6318023424863057e-06, "loss": 0.6598, "step": 9383 }, { "epoch": 0.6, "grad_norm": 1.5085846426938552, "learning_rate": 3.6308053792384035e-06, "loss": 0.6842, "step": 9384 }, { "epoch": 0.6, "grad_norm": 1.1645846678744358, "learning_rate": 3.6298084748356077e-06, "loss": 0.6467, "step": 9385 }, { "epoch": 0.6, "grad_norm": 2.166790765004681, "learning_rate": 3.628811629320764e-06, "loss": 0.6624, "step": 9386 }, { "epoch": 0.6, "grad_norm": 1.577458079089707, "learning_rate": 3.6278148427367154e-06, "loss": 0.7941, "step": 9387 }, { "epoch": 0.6, "grad_norm": 1.3968468450237363, "learning_rate": 3.626818115126301e-06, "loss": 0.6935, "step": 9388 }, { "epoch": 0.6, "grad_norm": 1.4997554373840658, "learning_rate": 3.6258214465323604e-06, "loss": 0.62, "step": 9389 }, { "epoch": 0.6, "grad_norm": 1.5125308389888719, "learning_rate": 3.6248248369977247e-06, "loss": 0.6178, "step": 9390 }, { "epoch": 0.6, "grad_norm": 1.4484531102928637, "learning_rate": 3.6238282865652304e-06, "loss": 0.7136, "step": 9391 }, { "epoch": 0.6, "grad_norm": 1.4488934859689957, "learning_rate": 3.622831795277702e-06, "loss": 0.7148, "step": 9392 }, { "epoch": 0.6, "grad_norm": 1.6025877379291047, "learning_rate": 3.62183536317797e-06, "loss": 0.734, "step": 9393 }, { "epoch": 0.6, "grad_norm": 1.5399513937301341, "learning_rate": 3.6208389903088578e-06, "loss": 0.7272, "step": 9394 }, { "epoch": 0.6, "grad_norm": 1.9769745168826505, "learning_rate": 3.6198426767131893e-06, "loss": 0.6762, "step": 9395 }, { "epoch": 0.6, "grad_norm": 1.613899195497143, "learning_rate": 3.6188464224337804e-06, "loss": 0.6518, "step": 9396 }, { "epoch": 0.6, "grad_norm": 1.5486878812024811, "learning_rate": 3.6178502275134507e-06, "loss": 0.6462, "step": 9397 }, { "epoch": 0.6, "grad_norm": 2.8608334740922294, "learning_rate": 3.6168540919950156e-06, "loss": 0.6894, "step": 9398 }, { "epoch": 0.6, "grad_norm": 1.868319296000686, "learning_rate": 3.6158580159212833e-06, "loss": 0.6586, "step": 9399 }, { "epoch": 0.6, "grad_norm": 1.4779437089829848, "learning_rate": 3.6148619993350653e-06, "loss": 0.6856, "step": 9400 }, { "epoch": 0.6, "grad_norm": 1.5789108860604766, "learning_rate": 3.6138660422791673e-06, "loss": 0.6162, "step": 9401 }, { "epoch": 0.6, "grad_norm": 1.4846829893420552, "learning_rate": 3.6128701447963963e-06, "loss": 0.7121, "step": 9402 }, { "epoch": 0.6, "grad_norm": 0.9868632532084507, "learning_rate": 3.6118743069295503e-06, "loss": 0.7224, "step": 9403 }, { "epoch": 0.6, "grad_norm": 1.5508923671913495, "learning_rate": 3.610878528721431e-06, "loss": 0.6742, "step": 9404 }, { "epoch": 0.6, "grad_norm": 1.4443682842675645, "learning_rate": 3.609882810214832e-06, "loss": 0.5994, "step": 9405 }, { "epoch": 0.6, "grad_norm": 1.5887235852703747, "learning_rate": 3.608887151452548e-06, "loss": 0.6346, "step": 9406 }, { "epoch": 0.6, "grad_norm": 2.2142574237243107, "learning_rate": 3.6078915524773726e-06, "loss": 0.5465, "step": 9407 }, { "epoch": 0.6, "grad_norm": 1.0809560377970315, "learning_rate": 3.6068960133320924e-06, "loss": 0.4985, "step": 9408 }, { "epoch": 0.6, "grad_norm": 1.6016094838218173, "learning_rate": 3.605900534059496e-06, "loss": 0.7005, "step": 9409 }, { "epoch": 0.6, "grad_norm": 1.7108232910624306, "learning_rate": 3.604905114702363e-06, "loss": 0.6491, "step": 9410 }, { "epoch": 0.6, "grad_norm": 1.6942533045607742, "learning_rate": 3.603909755303479e-06, "loss": 0.7079, "step": 9411 }, { "epoch": 0.6, "grad_norm": 1.5840640634810719, "learning_rate": 3.602914455905618e-06, "loss": 0.6373, "step": 9412 }, { "epoch": 0.6, "grad_norm": 1.628629656338154, "learning_rate": 3.6019192165515595e-06, "loss": 0.754, "step": 9413 }, { "epoch": 0.6, "grad_norm": 1.6470054337920026, "learning_rate": 3.600924037284073e-06, "loss": 0.5888, "step": 9414 }, { "epoch": 0.6, "grad_norm": 1.7057782699949482, "learning_rate": 3.5999289181459346e-06, "loss": 0.6455, "step": 9415 }, { "epoch": 0.6, "grad_norm": 1.53460195456567, "learning_rate": 3.5989338591799073e-06, "loss": 0.5787, "step": 9416 }, { "epoch": 0.6, "grad_norm": 1.3377709116214718, "learning_rate": 3.597938860428758e-06, "loss": 0.6099, "step": 9417 }, { "epoch": 0.6, "grad_norm": 1.703554977577356, "learning_rate": 3.596943921935253e-06, "loss": 0.6061, "step": 9418 }, { "epoch": 0.6, "grad_norm": 1.8216310743409754, "learning_rate": 3.5959490437421473e-06, "loss": 0.6022, "step": 9419 }, { "epoch": 0.6, "grad_norm": 1.7340188991283254, "learning_rate": 3.5949542258922033e-06, "loss": 0.6143, "step": 9420 }, { "epoch": 0.6, "grad_norm": 1.5935813171501347, "learning_rate": 3.5939594684281736e-06, "loss": 0.7322, "step": 9421 }, { "epoch": 0.6, "grad_norm": 1.6116588783636097, "learning_rate": 3.592964771392812e-06, "loss": 0.6356, "step": 9422 }, { "epoch": 0.6, "grad_norm": 1.4581422093162724, "learning_rate": 3.591970134828866e-06, "loss": 0.776, "step": 9423 }, { "epoch": 0.6, "grad_norm": 1.4484363605246628, "learning_rate": 3.5909755587790883e-06, "loss": 0.6279, "step": 9424 }, { "epoch": 0.6, "grad_norm": 1.5208850924738468, "learning_rate": 3.589981043286217e-06, "loss": 0.7203, "step": 9425 }, { "epoch": 0.6, "grad_norm": 1.5997516387349358, "learning_rate": 3.5889865883929986e-06, "loss": 0.5953, "step": 9426 }, { "epoch": 0.6, "grad_norm": 1.4461408300569225, "learning_rate": 3.587992194142173e-06, "loss": 0.5822, "step": 9427 }, { "epoch": 0.6, "grad_norm": 1.4871652428878703, "learning_rate": 3.5869978605764745e-06, "loss": 0.6127, "step": 9428 }, { "epoch": 0.6, "grad_norm": 1.5158310670679476, "learning_rate": 3.586003587738639e-06, "loss": 0.6122, "step": 9429 }, { "epoch": 0.6, "grad_norm": 2.50048724337139, "learning_rate": 3.585009375671398e-06, "loss": 0.6379, "step": 9430 }, { "epoch": 0.6, "grad_norm": 1.4004527066324963, "learning_rate": 3.584015224417482e-06, "loss": 0.7432, "step": 9431 }, { "epoch": 0.6, "grad_norm": 1.9567196229893964, "learning_rate": 3.583021134019614e-06, "loss": 0.7323, "step": 9432 }, { "epoch": 0.6, "grad_norm": 1.124705986213232, "learning_rate": 3.5820271045205227e-06, "loss": 0.6517, "step": 9433 }, { "epoch": 0.6, "grad_norm": 1.6077535900185391, "learning_rate": 3.5810331359629245e-06, "loss": 0.6605, "step": 9434 }, { "epoch": 0.6, "grad_norm": 1.7261341514080257, "learning_rate": 3.580039228389541e-06, "loss": 0.6157, "step": 9435 }, { "epoch": 0.6, "grad_norm": 1.568696886912907, "learning_rate": 3.5790453818430893e-06, "loss": 0.6719, "step": 9436 }, { "epoch": 0.6, "grad_norm": 2.008043623249664, "learning_rate": 3.578051596366279e-06, "loss": 0.7189, "step": 9437 }, { "epoch": 0.6, "grad_norm": 1.0471668020073446, "learning_rate": 3.5770578720018254e-06, "loss": 0.7473, "step": 9438 }, { "epoch": 0.6, "grad_norm": 1.6463775115303956, "learning_rate": 3.576064208792433e-06, "loss": 0.7046, "step": 9439 }, { "epoch": 0.6, "grad_norm": 1.7392945928620718, "learning_rate": 3.5750706067808104e-06, "loss": 0.6231, "step": 9440 }, { "epoch": 0.6, "grad_norm": 1.5699658498426088, "learning_rate": 3.574077066009657e-06, "loss": 0.7253, "step": 9441 }, { "epoch": 0.6, "grad_norm": 1.6169336523644122, "learning_rate": 3.5730835865216763e-06, "loss": 0.6657, "step": 9442 }, { "epoch": 0.6, "grad_norm": 1.6639196547614503, "learning_rate": 3.5720901683595633e-06, "loss": 0.658, "step": 9443 }, { "epoch": 0.6, "grad_norm": 1.9237268768072335, "learning_rate": 3.5710968115660145e-06, "loss": 0.6994, "step": 9444 }, { "epoch": 0.6, "grad_norm": 1.5346488258780413, "learning_rate": 3.570103516183724e-06, "loss": 0.7477, "step": 9445 }, { "epoch": 0.6, "grad_norm": 1.5516282560566432, "learning_rate": 3.569110282255378e-06, "loss": 0.6506, "step": 9446 }, { "epoch": 0.6, "grad_norm": 1.730096380527568, "learning_rate": 3.568117109823668e-06, "loss": 0.7337, "step": 9447 }, { "epoch": 0.6, "grad_norm": 1.4233151033347466, "learning_rate": 3.5671239989312726e-06, "loss": 0.6457, "step": 9448 }, { "epoch": 0.6, "grad_norm": 1.6023056962463786, "learning_rate": 3.5661309496208785e-06, "loss": 0.635, "step": 9449 }, { "epoch": 0.6, "grad_norm": 1.6363590344826915, "learning_rate": 3.5651379619351624e-06, "loss": 0.6444, "step": 9450 }, { "epoch": 0.6, "grad_norm": 1.7259689906721272, "learning_rate": 3.564145035916803e-06, "loss": 0.7443, "step": 9451 }, { "epoch": 0.6, "grad_norm": 1.2503330320546775, "learning_rate": 3.5631521716084715e-06, "loss": 0.6494, "step": 9452 }, { "epoch": 0.61, "grad_norm": 1.4013407350419873, "learning_rate": 3.56215936905284e-06, "loss": 0.6266, "step": 9453 }, { "epoch": 0.61, "grad_norm": 1.4964284870711173, "learning_rate": 3.5611666282925795e-06, "loss": 0.7026, "step": 9454 }, { "epoch": 0.61, "grad_norm": 1.568185040330955, "learning_rate": 3.5601739493703517e-06, "loss": 0.7443, "step": 9455 }, { "epoch": 0.61, "grad_norm": 1.6910825045834508, "learning_rate": 3.559181332328824e-06, "loss": 0.6347, "step": 9456 }, { "epoch": 0.61, "grad_norm": 1.3571234060706219, "learning_rate": 3.5581887772106536e-06, "loss": 0.6132, "step": 9457 }, { "epoch": 0.61, "grad_norm": 1.7432440899764259, "learning_rate": 3.5571962840585013e-06, "loss": 0.622, "step": 9458 }, { "epoch": 0.61, "grad_norm": 1.3643603781837794, "learning_rate": 3.5562038529150187e-06, "loss": 0.5672, "step": 9459 }, { "epoch": 0.61, "grad_norm": 1.5397431946989764, "learning_rate": 3.5552114838228634e-06, "loss": 0.7402, "step": 9460 }, { "epoch": 0.61, "grad_norm": 1.9403943779427528, "learning_rate": 3.554219176824681e-06, "loss": 0.6439, "step": 9461 }, { "epoch": 0.61, "grad_norm": 1.5916655370221549, "learning_rate": 3.5532269319631198e-06, "loss": 0.658, "step": 9462 }, { "epoch": 0.61, "grad_norm": 1.6649859390984734, "learning_rate": 3.5522347492808272e-06, "loss": 0.7388, "step": 9463 }, { "epoch": 0.61, "grad_norm": 1.7104686284824406, "learning_rate": 3.5512426288204404e-06, "loss": 0.7278, "step": 9464 }, { "epoch": 0.61, "grad_norm": 1.6358700249012903, "learning_rate": 3.5502505706246027e-06, "loss": 0.7139, "step": 9465 }, { "epoch": 0.61, "grad_norm": 1.5710604132530364, "learning_rate": 3.5492585747359483e-06, "loss": 0.6842, "step": 9466 }, { "epoch": 0.61, "grad_norm": 1.3014532114854076, "learning_rate": 3.5482666411971134e-06, "loss": 0.5918, "step": 9467 }, { "epoch": 0.61, "grad_norm": 1.0776262429503676, "learning_rate": 3.547274770050726e-06, "loss": 0.6112, "step": 9468 }, { "epoch": 0.61, "grad_norm": 1.8758020450951736, "learning_rate": 3.5462829613394186e-06, "loss": 0.7203, "step": 9469 }, { "epoch": 0.61, "grad_norm": 1.578129377314573, "learning_rate": 3.545291215105813e-06, "loss": 0.6376, "step": 9470 }, { "epoch": 0.61, "grad_norm": 1.5549324082089724, "learning_rate": 3.544299531392533e-06, "loss": 0.686, "step": 9471 }, { "epoch": 0.61, "grad_norm": 1.4082409684106385, "learning_rate": 3.5433079102422024e-06, "loss": 0.6353, "step": 9472 }, { "epoch": 0.61, "grad_norm": 1.6521803036165812, "learning_rate": 3.5423163516974356e-06, "loss": 0.6495, "step": 9473 }, { "epoch": 0.61, "grad_norm": 1.4471398242284008, "learning_rate": 3.5413248558008505e-06, "loss": 0.663, "step": 9474 }, { "epoch": 0.61, "grad_norm": 1.6877622905498144, "learning_rate": 3.540333422595057e-06, "loss": 0.6906, "step": 9475 }, { "epoch": 0.61, "grad_norm": 1.5883858085921174, "learning_rate": 3.5393420521226675e-06, "loss": 0.6535, "step": 9476 }, { "epoch": 0.61, "grad_norm": 1.430029566382872, "learning_rate": 3.538350744426285e-06, "loss": 0.7427, "step": 9477 }, { "epoch": 0.61, "grad_norm": 1.5798356295262328, "learning_rate": 3.5373594995485173e-06, "loss": 0.6594, "step": 9478 }, { "epoch": 0.61, "grad_norm": 1.5419152640933709, "learning_rate": 3.5363683175319637e-06, "loss": 0.6507, "step": 9479 }, { "epoch": 0.61, "grad_norm": 1.4846916398117993, "learning_rate": 3.5353771984192243e-06, "loss": 0.6595, "step": 9480 }, { "epoch": 0.61, "grad_norm": 1.1052505262122942, "learning_rate": 3.5343861422528973e-06, "loss": 0.6272, "step": 9481 }, { "epoch": 0.61, "grad_norm": 1.399439983410753, "learning_rate": 3.5333951490755723e-06, "loss": 0.595, "step": 9482 }, { "epoch": 0.61, "grad_norm": 1.719761928296368, "learning_rate": 3.5324042189298445e-06, "loss": 0.6818, "step": 9483 }, { "epoch": 0.61, "grad_norm": 1.6290678732136588, "learning_rate": 3.5314133518582972e-06, "loss": 0.6657, "step": 9484 }, { "epoch": 0.61, "grad_norm": 1.5035574105087512, "learning_rate": 3.5304225479035193e-06, "loss": 0.6715, "step": 9485 }, { "epoch": 0.61, "grad_norm": 1.636519540148349, "learning_rate": 3.5294318071080906e-06, "loss": 0.7411, "step": 9486 }, { "epoch": 0.61, "grad_norm": 1.359992012660015, "learning_rate": 3.5284411295145954e-06, "loss": 0.6416, "step": 9487 }, { "epoch": 0.61, "grad_norm": 1.701138020768113, "learning_rate": 3.527450515165606e-06, "loss": 0.6188, "step": 9488 }, { "epoch": 0.61, "grad_norm": 1.4519588800396774, "learning_rate": 3.526459964103701e-06, "loss": 0.6435, "step": 9489 }, { "epoch": 0.61, "grad_norm": 1.908254137159264, "learning_rate": 3.525469476371449e-06, "loss": 0.7679, "step": 9490 }, { "epoch": 0.61, "grad_norm": 1.6502255567965147, "learning_rate": 3.5244790520114202e-06, "loss": 0.6443, "step": 9491 }, { "epoch": 0.61, "grad_norm": 1.1398783792431508, "learning_rate": 3.5234886910661825e-06, "loss": 0.6779, "step": 9492 }, { "epoch": 0.61, "grad_norm": 1.2369315811769517, "learning_rate": 3.5224983935782975e-06, "loss": 0.637, "step": 9493 }, { "epoch": 0.61, "grad_norm": 1.7628409307551056, "learning_rate": 3.521508159590328e-06, "loss": 0.6522, "step": 9494 }, { "epoch": 0.61, "grad_norm": 1.4656199748063277, "learning_rate": 3.5205179891448294e-06, "loss": 0.6409, "step": 9495 }, { "epoch": 0.61, "grad_norm": 1.6217447731631631, "learning_rate": 3.5195278822843605e-06, "loss": 0.5951, "step": 9496 }, { "epoch": 0.61, "grad_norm": 1.3731524749626247, "learning_rate": 3.518537839051471e-06, "loss": 0.6466, "step": 9497 }, { "epoch": 0.61, "grad_norm": 1.6172009001267897, "learning_rate": 3.5175478594887125e-06, "loss": 0.6628, "step": 9498 }, { "epoch": 0.61, "grad_norm": 1.6103588818550942, "learning_rate": 3.5165579436386323e-06, "loss": 0.6404, "step": 9499 }, { "epoch": 0.61, "grad_norm": 1.5015317732620883, "learning_rate": 3.515568091543774e-06, "loss": 0.6508, "step": 9500 }, { "epoch": 0.61, "grad_norm": 1.621284901666463, "learning_rate": 3.51457830324668e-06, "loss": 0.6214, "step": 9501 }, { "epoch": 0.61, "grad_norm": 1.0113077704235969, "learning_rate": 3.5135885787898887e-06, "loss": 0.5851, "step": 9502 }, { "epoch": 0.61, "grad_norm": 1.5702854836349505, "learning_rate": 3.5125989182159393e-06, "loss": 0.713, "step": 9503 }, { "epoch": 0.61, "grad_norm": 1.8002492645181136, "learning_rate": 3.5116093215673603e-06, "loss": 0.6698, "step": 9504 }, { "epoch": 0.61, "grad_norm": 1.5316240966830421, "learning_rate": 3.5106197888866873e-06, "loss": 0.7934, "step": 9505 }, { "epoch": 0.61, "grad_norm": 1.0846554183258568, "learning_rate": 3.5096303202164437e-06, "loss": 0.606, "step": 9506 }, { "epoch": 0.61, "grad_norm": 1.7144225871772873, "learning_rate": 3.508640915599159e-06, "loss": 0.6308, "step": 9507 }, { "epoch": 0.61, "grad_norm": 1.417866164238729, "learning_rate": 3.5076515750773533e-06, "loss": 0.6219, "step": 9508 }, { "epoch": 0.61, "grad_norm": 1.457414986322741, "learning_rate": 3.506662298693546e-06, "loss": 0.6492, "step": 9509 }, { "epoch": 0.61, "grad_norm": 1.372508763253191, "learning_rate": 3.5056730864902577e-06, "loss": 0.6867, "step": 9510 }, { "epoch": 0.61, "grad_norm": 1.3323309341810745, "learning_rate": 3.5046839385099977e-06, "loss": 0.6664, "step": 9511 }, { "epoch": 0.61, "grad_norm": 1.6045643268227832, "learning_rate": 3.5036948547952824e-06, "loss": 0.5853, "step": 9512 }, { "epoch": 0.61, "grad_norm": 1.364039371178762, "learning_rate": 3.502705835388616e-06, "loss": 0.6575, "step": 9513 }, { "epoch": 0.61, "grad_norm": 1.0674806495281441, "learning_rate": 3.5017168803325076e-06, "loss": 0.6766, "step": 9514 }, { "epoch": 0.61, "grad_norm": 1.5038424737350908, "learning_rate": 3.500727989669458e-06, "loss": 0.6583, "step": 9515 }, { "epoch": 0.61, "grad_norm": 1.5401643816306398, "learning_rate": 3.499739163441971e-06, "loss": 0.6962, "step": 9516 }, { "epoch": 0.61, "grad_norm": 1.2364529665684025, "learning_rate": 3.498750401692541e-06, "loss": 0.7073, "step": 9517 }, { "epoch": 0.61, "grad_norm": 1.5829482847990382, "learning_rate": 3.4977617044636635e-06, "loss": 0.685, "step": 9518 }, { "epoch": 0.61, "grad_norm": 1.6446100344185577, "learning_rate": 3.496773071797834e-06, "loss": 0.6316, "step": 9519 }, { "epoch": 0.61, "grad_norm": 1.3257281179086804, "learning_rate": 3.495784503737536e-06, "loss": 0.588, "step": 9520 }, { "epoch": 0.61, "grad_norm": 1.6767447144353478, "learning_rate": 3.4947960003252614e-06, "loss": 0.739, "step": 9521 }, { "epoch": 0.61, "grad_norm": 1.5883025769750738, "learning_rate": 3.4938075616034903e-06, "loss": 0.6465, "step": 9522 }, { "epoch": 0.61, "grad_norm": 1.3790730811599952, "learning_rate": 3.492819187614707e-06, "loss": 0.6866, "step": 9523 }, { "epoch": 0.61, "grad_norm": 1.5876322031829644, "learning_rate": 3.4918308784013866e-06, "loss": 0.7984, "step": 9524 }, { "epoch": 0.61, "grad_norm": 1.6493445005129101, "learning_rate": 3.4908426340060075e-06, "loss": 0.6591, "step": 9525 }, { "epoch": 0.61, "grad_norm": 1.7484648655479185, "learning_rate": 3.489854454471039e-06, "loss": 0.6795, "step": 9526 }, { "epoch": 0.61, "grad_norm": 1.5135622987984265, "learning_rate": 3.488866339838953e-06, "loss": 0.6582, "step": 9527 }, { "epoch": 0.61, "grad_norm": 2.2757168687855494, "learning_rate": 3.487878290152217e-06, "loss": 0.7303, "step": 9528 }, { "epoch": 0.61, "grad_norm": 1.5294289563583618, "learning_rate": 3.4868903054532934e-06, "loss": 0.6189, "step": 9529 }, { "epoch": 0.61, "grad_norm": 1.520154926574187, "learning_rate": 3.4859023857846473e-06, "loss": 0.5702, "step": 9530 }, { "epoch": 0.61, "grad_norm": 1.8335052593339811, "learning_rate": 3.4849145311887335e-06, "loss": 0.701, "step": 9531 }, { "epoch": 0.61, "grad_norm": 1.6626919304294276, "learning_rate": 3.4839267417080113e-06, "loss": 0.6411, "step": 9532 }, { "epoch": 0.61, "grad_norm": 1.391699351417432, "learning_rate": 3.4829390173849296e-06, "loss": 0.6281, "step": 9533 }, { "epoch": 0.61, "grad_norm": 1.6004559712859203, "learning_rate": 3.481951358261942e-06, "loss": 0.8429, "step": 9534 }, { "epoch": 0.61, "grad_norm": 1.534845532486995, "learning_rate": 3.4809637643814965e-06, "loss": 0.5201, "step": 9535 }, { "epoch": 0.61, "grad_norm": 1.5980666520754978, "learning_rate": 3.4799762357860343e-06, "loss": 0.6912, "step": 9536 }, { "epoch": 0.61, "grad_norm": 1.508796473256507, "learning_rate": 3.478988772518001e-06, "loss": 0.6499, "step": 9537 }, { "epoch": 0.61, "grad_norm": 1.5346420873882791, "learning_rate": 3.4780013746198326e-06, "loss": 0.6302, "step": 9538 }, { "epoch": 0.61, "grad_norm": 1.5366806900273118, "learning_rate": 3.47701404213397e-06, "loss": 0.653, "step": 9539 }, { "epoch": 0.61, "grad_norm": 1.2549055811386252, "learning_rate": 3.476026775102841e-06, "loss": 0.6397, "step": 9540 }, { "epoch": 0.61, "grad_norm": 1.5041926132056687, "learning_rate": 3.475039573568881e-06, "loss": 0.6715, "step": 9541 }, { "epoch": 0.61, "grad_norm": 1.70221262371691, "learning_rate": 3.4740524375745133e-06, "loss": 0.6824, "step": 9542 }, { "epoch": 0.61, "grad_norm": 1.7359449321320035, "learning_rate": 3.4730653671621667e-06, "loss": 0.6479, "step": 9543 }, { "epoch": 0.61, "grad_norm": 1.619027828670267, "learning_rate": 3.472078362374261e-06, "loss": 0.6476, "step": 9544 }, { "epoch": 0.61, "grad_norm": 1.8051828458197752, "learning_rate": 3.4710914232532167e-06, "loss": 0.7173, "step": 9545 }, { "epoch": 0.61, "grad_norm": 1.5807268349355228, "learning_rate": 3.470104549841452e-06, "loss": 0.633, "step": 9546 }, { "epoch": 0.61, "grad_norm": 1.5042453866441772, "learning_rate": 3.4691177421813783e-06, "loss": 0.6009, "step": 9547 }, { "epoch": 0.61, "grad_norm": 1.6001962424846359, "learning_rate": 3.4681310003154076e-06, "loss": 0.6814, "step": 9548 }, { "epoch": 0.61, "grad_norm": 1.0468024778497982, "learning_rate": 3.4671443242859465e-06, "loss": 0.6614, "step": 9549 }, { "epoch": 0.61, "grad_norm": 1.4691048912650917, "learning_rate": 3.466157714135402e-06, "loss": 0.6484, "step": 9550 }, { "epoch": 0.61, "grad_norm": 1.5394194555406076, "learning_rate": 3.465171169906175e-06, "loss": 0.6911, "step": 9551 }, { "epoch": 0.61, "grad_norm": 1.2568333887271432, "learning_rate": 3.4641846916406685e-06, "loss": 0.6376, "step": 9552 }, { "epoch": 0.61, "grad_norm": 1.4445832470724032, "learning_rate": 3.4631982793812745e-06, "loss": 0.635, "step": 9553 }, { "epoch": 0.61, "grad_norm": 1.3525081371130436, "learning_rate": 3.4622119331703884e-06, "loss": 0.6218, "step": 9554 }, { "epoch": 0.61, "grad_norm": 1.4449463193965788, "learning_rate": 3.461225653050405e-06, "loss": 0.6981, "step": 9555 }, { "epoch": 0.61, "grad_norm": 1.4666848258911003, "learning_rate": 3.4602394390637074e-06, "loss": 0.6748, "step": 9556 }, { "epoch": 0.61, "grad_norm": 1.001186872630477, "learning_rate": 3.4592532912526845e-06, "loss": 0.7149, "step": 9557 }, { "epoch": 0.61, "grad_norm": 1.0329572384719479, "learning_rate": 3.458267209659716e-06, "loss": 0.6773, "step": 9558 }, { "epoch": 0.61, "grad_norm": 1.6993071411270044, "learning_rate": 3.4572811943271856e-06, "loss": 0.6889, "step": 9559 }, { "epoch": 0.61, "grad_norm": 1.6707536362038249, "learning_rate": 3.456295245297465e-06, "loss": 0.7141, "step": 9560 }, { "epoch": 0.61, "grad_norm": 1.895216338003045, "learning_rate": 3.4553093626129343e-06, "loss": 0.6854, "step": 9561 }, { "epoch": 0.61, "grad_norm": 1.469636424504865, "learning_rate": 3.4543235463159587e-06, "loss": 0.6359, "step": 9562 }, { "epoch": 0.61, "grad_norm": 1.7320498662209944, "learning_rate": 3.45333779644891e-06, "loss": 0.7006, "step": 9563 }, { "epoch": 0.61, "grad_norm": 1.452947685341283, "learning_rate": 3.4523521130541517e-06, "loss": 0.7076, "step": 9564 }, { "epoch": 0.61, "grad_norm": 1.1921818969775309, "learning_rate": 3.451366496174048e-06, "loss": 0.6948, "step": 9565 }, { "epoch": 0.61, "grad_norm": 1.6415546073508935, "learning_rate": 3.4503809458509596e-06, "loss": 0.7259, "step": 9566 }, { "epoch": 0.61, "grad_norm": 1.3786958567588072, "learning_rate": 3.4493954621272407e-06, "loss": 0.6223, "step": 9567 }, { "epoch": 0.61, "grad_norm": 1.922409391778751, "learning_rate": 3.448410045045248e-06, "loss": 0.6056, "step": 9568 }, { "epoch": 0.61, "grad_norm": 1.5881761823162186, "learning_rate": 3.447424694647329e-06, "loss": 0.6069, "step": 9569 }, { "epoch": 0.61, "grad_norm": 1.4797534457646873, "learning_rate": 3.446439410975836e-06, "loss": 0.6012, "step": 9570 }, { "epoch": 0.61, "grad_norm": 2.1493806638948283, "learning_rate": 3.445454194073111e-06, "loss": 0.6653, "step": 9571 }, { "epoch": 0.61, "grad_norm": 1.381764811582471, "learning_rate": 3.4444690439815005e-06, "loss": 0.6874, "step": 9572 }, { "epoch": 0.61, "grad_norm": 2.3135390762874883, "learning_rate": 3.4434839607433396e-06, "loss": 0.6125, "step": 9573 }, { "epoch": 0.61, "grad_norm": 1.5473574437770297, "learning_rate": 3.4424989444009677e-06, "loss": 0.6457, "step": 9574 }, { "epoch": 0.61, "grad_norm": 1.4531921967642618, "learning_rate": 3.4415139949967203e-06, "loss": 0.723, "step": 9575 }, { "epoch": 0.61, "grad_norm": 1.8974998588642082, "learning_rate": 3.4405291125729247e-06, "loss": 0.7534, "step": 9576 }, { "epoch": 0.61, "grad_norm": 1.519625754320171, "learning_rate": 3.439544297171913e-06, "loss": 0.689, "step": 9577 }, { "epoch": 0.61, "grad_norm": 1.2312171662933549, "learning_rate": 3.4385595488360056e-06, "loss": 0.6114, "step": 9578 }, { "epoch": 0.61, "grad_norm": 1.391017726839167, "learning_rate": 3.437574867607529e-06, "loss": 0.5631, "step": 9579 }, { "epoch": 0.61, "grad_norm": 1.2739160662579818, "learning_rate": 3.436590253528801e-06, "loss": 0.6504, "step": 9580 }, { "epoch": 0.61, "grad_norm": 1.3956086880636285, "learning_rate": 3.435605706642141e-06, "loss": 0.5802, "step": 9581 }, { "epoch": 0.61, "grad_norm": 1.3987392829670646, "learning_rate": 3.434621226989858e-06, "loss": 0.6561, "step": 9582 }, { "epoch": 0.61, "grad_norm": 1.324826244725259, "learning_rate": 3.4336368146142653e-06, "loss": 0.5777, "step": 9583 }, { "epoch": 0.61, "grad_norm": 1.507869195058597, "learning_rate": 3.4326524695576734e-06, "loss": 0.6136, "step": 9584 }, { "epoch": 0.61, "grad_norm": 1.768453909333615, "learning_rate": 3.4316681918623825e-06, "loss": 0.7031, "step": 9585 }, { "epoch": 0.61, "grad_norm": 1.3943474930951572, "learning_rate": 3.4306839815706985e-06, "loss": 0.7455, "step": 9586 }, { "epoch": 0.61, "grad_norm": 1.7345442233562711, "learning_rate": 3.4296998387249175e-06, "loss": 0.7237, "step": 9587 }, { "epoch": 0.61, "grad_norm": 1.856345668614242, "learning_rate": 3.428715763367341e-06, "loss": 0.7427, "step": 9588 }, { "epoch": 0.61, "grad_norm": 1.768336226507142, "learning_rate": 3.427731755540256e-06, "loss": 0.7448, "step": 9589 }, { "epoch": 0.61, "grad_norm": 1.7109243824570595, "learning_rate": 3.426747815285959e-06, "loss": 0.6355, "step": 9590 }, { "epoch": 0.61, "grad_norm": 1.218206966776527, "learning_rate": 3.425763942646733e-06, "loss": 0.6615, "step": 9591 }, { "epoch": 0.61, "grad_norm": 1.384063201899061, "learning_rate": 3.424780137664865e-06, "loss": 0.6262, "step": 9592 }, { "epoch": 0.61, "grad_norm": 1.630807780795899, "learning_rate": 3.423796400382637e-06, "loss": 0.6297, "step": 9593 }, { "epoch": 0.61, "grad_norm": 1.5330259698147963, "learning_rate": 3.4228127308423276e-06, "loss": 0.7302, "step": 9594 }, { "epoch": 0.61, "grad_norm": 1.8189876925540784, "learning_rate": 3.421829129086215e-06, "loss": 0.7455, "step": 9595 }, { "epoch": 0.61, "grad_norm": 1.3598649490903791, "learning_rate": 3.420845595156568e-06, "loss": 0.6195, "step": 9596 }, { "epoch": 0.61, "grad_norm": 1.5893526710537678, "learning_rate": 3.4198621290956603e-06, "loss": 0.6712, "step": 9597 }, { "epoch": 0.61, "grad_norm": 1.499472731746974, "learning_rate": 3.4188787309457565e-06, "loss": 0.7264, "step": 9598 }, { "epoch": 0.61, "grad_norm": 1.6398499009225194, "learning_rate": 3.4178954007491237e-06, "loss": 0.7011, "step": 9599 }, { "epoch": 0.61, "grad_norm": 1.4678672535238277, "learning_rate": 3.416912138548021e-06, "loss": 0.6972, "step": 9600 }, { "epoch": 0.61, "grad_norm": 1.5395220158419745, "learning_rate": 3.4159289443847077e-06, "loss": 0.6757, "step": 9601 }, { "epoch": 0.61, "grad_norm": 1.1481131752790785, "learning_rate": 3.4149458183014418e-06, "loss": 0.7516, "step": 9602 }, { "epoch": 0.61, "grad_norm": 1.342789096352883, "learning_rate": 3.4139627603404724e-06, "loss": 0.6744, "step": 9603 }, { "epoch": 0.61, "grad_norm": 1.453016202903098, "learning_rate": 3.4129797705440525e-06, "loss": 0.6053, "step": 9604 }, { "epoch": 0.61, "grad_norm": 1.6533105505833046, "learning_rate": 3.4119968489544254e-06, "loss": 0.6439, "step": 9605 }, { "epoch": 0.61, "grad_norm": 1.578054553844076, "learning_rate": 3.4110139956138377e-06, "loss": 0.6925, "step": 9606 }, { "epoch": 0.61, "grad_norm": 1.088148300724098, "learning_rate": 3.4100312105645283e-06, "loss": 0.643, "step": 9607 }, { "epoch": 0.61, "grad_norm": 1.4999877847055645, "learning_rate": 3.409048493848739e-06, "loss": 0.759, "step": 9608 }, { "epoch": 0.62, "grad_norm": 1.840005840357683, "learning_rate": 3.4080658455087e-06, "loss": 0.6407, "step": 9609 }, { "epoch": 0.62, "grad_norm": 1.4940762418237423, "learning_rate": 3.4070832655866467e-06, "loss": 0.6418, "step": 9610 }, { "epoch": 0.62, "grad_norm": 1.5197364435464762, "learning_rate": 3.4061007541248093e-06, "loss": 0.7306, "step": 9611 }, { "epoch": 0.62, "grad_norm": 1.032406718896196, "learning_rate": 3.4051183111654097e-06, "loss": 0.6693, "step": 9612 }, { "epoch": 0.62, "grad_norm": 1.325327813455807, "learning_rate": 3.404135936750674e-06, "loss": 0.6737, "step": 9613 }, { "epoch": 0.62, "grad_norm": 1.522980209155689, "learning_rate": 3.403153630922824e-06, "loss": 0.7278, "step": 9614 }, { "epoch": 0.62, "grad_norm": 1.6810934949072338, "learning_rate": 3.4021713937240748e-06, "loss": 0.6659, "step": 9615 }, { "epoch": 0.62, "grad_norm": 1.7436457862800792, "learning_rate": 3.4011892251966403e-06, "loss": 0.6986, "step": 9616 }, { "epoch": 0.62, "grad_norm": 1.7583464431020839, "learning_rate": 3.4002071253827356e-06, "loss": 0.6987, "step": 9617 }, { "epoch": 0.62, "grad_norm": 1.3116299624706904, "learning_rate": 3.3992250943245654e-06, "loss": 0.5939, "step": 9618 }, { "epoch": 0.62, "grad_norm": 1.5807995772840944, "learning_rate": 3.398243132064336e-06, "loss": 0.5407, "step": 9619 }, { "epoch": 0.62, "grad_norm": 1.7308869043906452, "learning_rate": 3.397261238644254e-06, "loss": 0.7352, "step": 9620 }, { "epoch": 0.62, "grad_norm": 1.431980547185475, "learning_rate": 3.3962794141065136e-06, "loss": 0.6184, "step": 9621 }, { "epoch": 0.62, "grad_norm": 1.7598569625185443, "learning_rate": 3.395297658493315e-06, "loss": 0.6964, "step": 9622 }, { "epoch": 0.62, "grad_norm": 2.1632959679861146, "learning_rate": 3.3943159718468503e-06, "loss": 0.7493, "step": 9623 }, { "epoch": 0.62, "grad_norm": 1.6222851675029695, "learning_rate": 3.3933343542093134e-06, "loss": 0.6506, "step": 9624 }, { "epoch": 0.62, "grad_norm": 0.983263024338955, "learning_rate": 3.392352805622888e-06, "loss": 0.6067, "step": 9625 }, { "epoch": 0.62, "grad_norm": 1.4130986573175006, "learning_rate": 3.391371326129762e-06, "loss": 0.7488, "step": 9626 }, { "epoch": 0.62, "grad_norm": 1.6645544735012099, "learning_rate": 3.3903899157721156e-06, "loss": 0.7743, "step": 9627 }, { "epoch": 0.62, "grad_norm": 1.428020822105732, "learning_rate": 3.3894085745921278e-06, "loss": 0.6217, "step": 9628 }, { "epoch": 0.62, "grad_norm": 1.4349823708063396, "learning_rate": 3.388427302631975e-06, "loss": 0.6019, "step": 9629 }, { "epoch": 0.62, "grad_norm": 1.434437625528549, "learning_rate": 3.3874460999338297e-06, "loss": 0.7127, "step": 9630 }, { "epoch": 0.62, "grad_norm": 1.4109545552314477, "learning_rate": 3.386464966539865e-06, "loss": 0.6351, "step": 9631 }, { "epoch": 0.62, "grad_norm": 1.5927961993091444, "learning_rate": 3.385483902492244e-06, "loss": 0.7413, "step": 9632 }, { "epoch": 0.62, "grad_norm": 0.9919096521069533, "learning_rate": 3.3845029078331344e-06, "loss": 0.7202, "step": 9633 }, { "epoch": 0.62, "grad_norm": 1.4996306642841342, "learning_rate": 3.383521982604693e-06, "loss": 0.6391, "step": 9634 }, { "epoch": 0.62, "grad_norm": 1.3964471648014303, "learning_rate": 3.3825411268490803e-06, "loss": 0.5381, "step": 9635 }, { "epoch": 0.62, "grad_norm": 1.2960215163025466, "learning_rate": 3.3815603406084505e-06, "loss": 0.6378, "step": 9636 }, { "epoch": 0.62, "grad_norm": 2.0612113239815075, "learning_rate": 3.380579623924959e-06, "loss": 0.8008, "step": 9637 }, { "epoch": 0.62, "grad_norm": 1.6798735027898144, "learning_rate": 3.37959897684075e-06, "loss": 0.6867, "step": 9638 }, { "epoch": 0.62, "grad_norm": 1.7563445816393273, "learning_rate": 3.378618399397972e-06, "loss": 0.6636, "step": 9639 }, { "epoch": 0.62, "grad_norm": 1.486313902789446, "learning_rate": 3.377637891638771e-06, "loss": 0.6888, "step": 9640 }, { "epoch": 0.62, "grad_norm": 1.5947512680831732, "learning_rate": 3.3766574536052808e-06, "loss": 0.6902, "step": 9641 }, { "epoch": 0.62, "grad_norm": 1.6074262810528186, "learning_rate": 3.375677085339645e-06, "loss": 0.7058, "step": 9642 }, { "epoch": 0.62, "grad_norm": 1.3540946389148383, "learning_rate": 3.374696786883992e-06, "loss": 0.6761, "step": 9643 }, { "epoch": 0.62, "grad_norm": 1.6043749980817035, "learning_rate": 3.3737165582804587e-06, "loss": 0.6914, "step": 9644 }, { "epoch": 0.62, "grad_norm": 1.3786870603744552, "learning_rate": 3.3727363995711695e-06, "loss": 0.6034, "step": 9645 }, { "epoch": 0.62, "grad_norm": 1.5930910853644569, "learning_rate": 3.371756310798251e-06, "loss": 0.6753, "step": 9646 }, { "epoch": 0.62, "grad_norm": 1.5344261146251448, "learning_rate": 3.3707762920038235e-06, "loss": 0.6215, "step": 9647 }, { "epoch": 0.62, "grad_norm": 1.5759255724254817, "learning_rate": 3.3697963432300074e-06, "loss": 0.675, "step": 9648 }, { "epoch": 0.62, "grad_norm": 1.63180732470703, "learning_rate": 3.3688164645189198e-06, "loss": 0.7288, "step": 9649 }, { "epoch": 0.62, "grad_norm": 1.752187233958405, "learning_rate": 3.3678366559126728e-06, "loss": 0.6176, "step": 9650 }, { "epoch": 0.62, "grad_norm": 1.469466876078186, "learning_rate": 3.366856917453377e-06, "loss": 0.6515, "step": 9651 }, { "epoch": 0.62, "grad_norm": 1.4323550588507508, "learning_rate": 3.365877249183138e-06, "loss": 0.6278, "step": 9652 }, { "epoch": 0.62, "grad_norm": 1.647753951415462, "learning_rate": 3.3648976511440636e-06, "loss": 0.6055, "step": 9653 }, { "epoch": 0.62, "grad_norm": 1.2689295137840333, "learning_rate": 3.3639181233782496e-06, "loss": 0.6894, "step": 9654 }, { "epoch": 0.62, "grad_norm": 1.5348288702199913, "learning_rate": 3.3629386659277984e-06, "loss": 0.5948, "step": 9655 }, { "epoch": 0.62, "grad_norm": 1.4605577974408035, "learning_rate": 3.361959278834803e-06, "loss": 0.704, "step": 9656 }, { "epoch": 0.62, "grad_norm": 2.1644644370221684, "learning_rate": 3.3609799621413554e-06, "loss": 0.6353, "step": 9657 }, { "epoch": 0.62, "grad_norm": 1.6433721801860743, "learning_rate": 3.3600007158895453e-06, "loss": 0.792, "step": 9658 }, { "epoch": 0.62, "grad_norm": 2.0038637842608447, "learning_rate": 3.359021540121457e-06, "loss": 0.612, "step": 9659 }, { "epoch": 0.62, "grad_norm": 1.6021324032521442, "learning_rate": 3.3580424348791773e-06, "loss": 0.7517, "step": 9660 }, { "epoch": 0.62, "grad_norm": 1.635697528400509, "learning_rate": 3.3570634002047815e-06, "loss": 0.7436, "step": 9661 }, { "epoch": 0.62, "grad_norm": 1.5223193716864136, "learning_rate": 3.3560844361403506e-06, "loss": 0.6188, "step": 9662 }, { "epoch": 0.62, "grad_norm": 0.9599201120915554, "learning_rate": 3.355105542727954e-06, "loss": 0.6702, "step": 9663 }, { "epoch": 0.62, "grad_norm": 1.4086994735347376, "learning_rate": 3.354126720009666e-06, "loss": 0.6606, "step": 9664 }, { "epoch": 0.62, "grad_norm": 2.6648606111397797, "learning_rate": 3.353147968027552e-06, "loss": 0.777, "step": 9665 }, { "epoch": 0.62, "grad_norm": 1.0147338913022164, "learning_rate": 3.3521692868236777e-06, "loss": 0.6265, "step": 9666 }, { "epoch": 0.62, "grad_norm": 1.3975183945638325, "learning_rate": 3.351190676440107e-06, "loss": 0.6511, "step": 9667 }, { "epoch": 0.62, "grad_norm": 1.586439586278683, "learning_rate": 3.3502121369188945e-06, "loss": 0.7334, "step": 9668 }, { "epoch": 0.62, "grad_norm": 1.6112079464478941, "learning_rate": 3.3492336683021e-06, "loss": 0.6818, "step": 9669 }, { "epoch": 0.62, "grad_norm": 1.1424007395301554, "learning_rate": 3.3482552706317705e-06, "loss": 0.6693, "step": 9670 }, { "epoch": 0.62, "grad_norm": 1.6810321971416595, "learning_rate": 3.347276943949961e-06, "loss": 0.651, "step": 9671 }, { "epoch": 0.62, "grad_norm": 1.2472344294973472, "learning_rate": 3.3462986882987137e-06, "loss": 0.6551, "step": 9672 }, { "epoch": 0.62, "grad_norm": 1.5502083643854938, "learning_rate": 3.3453205037200766e-06, "loss": 0.575, "step": 9673 }, { "epoch": 0.62, "grad_norm": 1.5211841860213409, "learning_rate": 3.3443423902560845e-06, "loss": 0.7068, "step": 9674 }, { "epoch": 0.62, "grad_norm": 1.694976683396153, "learning_rate": 3.3433643479487777e-06, "loss": 0.8524, "step": 9675 }, { "epoch": 0.62, "grad_norm": 1.4783682330099917, "learning_rate": 3.342386376840193e-06, "loss": 0.779, "step": 9676 }, { "epoch": 0.62, "grad_norm": 1.4905988179829799, "learning_rate": 3.3414084769723554e-06, "loss": 0.6675, "step": 9677 }, { "epoch": 0.62, "grad_norm": 1.5363594680695318, "learning_rate": 3.3404306483872982e-06, "loss": 0.6901, "step": 9678 }, { "epoch": 0.62, "grad_norm": 1.5009317799131152, "learning_rate": 3.339452891127043e-06, "loss": 0.6509, "step": 9679 }, { "epoch": 0.62, "grad_norm": 1.6699568506517268, "learning_rate": 3.3384752052336155e-06, "loss": 0.6933, "step": 9680 }, { "epoch": 0.62, "grad_norm": 1.489407247968859, "learning_rate": 3.33749759074903e-06, "loss": 0.6658, "step": 9681 }, { "epoch": 0.62, "grad_norm": 1.908895981341594, "learning_rate": 3.336520047715307e-06, "loss": 0.7711, "step": 9682 }, { "epoch": 0.62, "grad_norm": 1.738111743666377, "learning_rate": 3.3355425761744553e-06, "loss": 0.6903, "step": 9683 }, { "epoch": 0.62, "grad_norm": 1.5018435720583496, "learning_rate": 3.3345651761684856e-06, "loss": 0.6729, "step": 9684 }, { "epoch": 0.62, "grad_norm": 1.3898551435767599, "learning_rate": 3.3335878477394058e-06, "loss": 0.6624, "step": 9685 }, { "epoch": 0.62, "grad_norm": 1.650004504506319, "learning_rate": 3.3326105909292194e-06, "loss": 0.5883, "step": 9686 }, { "epoch": 0.62, "grad_norm": 1.6484630644109572, "learning_rate": 3.331633405779926e-06, "loss": 0.8034, "step": 9687 }, { "epoch": 0.62, "grad_norm": 1.5179187397767997, "learning_rate": 3.3306562923335218e-06, "loss": 0.6578, "step": 9688 }, { "epoch": 0.62, "grad_norm": 1.6625415751619639, "learning_rate": 3.3296792506320043e-06, "loss": 0.6021, "step": 9689 }, { "epoch": 0.62, "grad_norm": 1.5057430594096946, "learning_rate": 3.328702280717361e-06, "loss": 0.6447, "step": 9690 }, { "epoch": 0.62, "grad_norm": 1.5373741154987732, "learning_rate": 3.3277253826315824e-06, "loss": 0.655, "step": 9691 }, { "epoch": 0.62, "grad_norm": 1.583446927905271, "learning_rate": 3.3267485564166536e-06, "loss": 0.6958, "step": 9692 }, { "epoch": 0.62, "grad_norm": 1.509046073646638, "learning_rate": 3.325771802114555e-06, "loss": 0.6084, "step": 9693 }, { "epoch": 0.62, "grad_norm": 1.6173582524706172, "learning_rate": 3.3247951197672663e-06, "loss": 0.6963, "step": 9694 }, { "epoch": 0.62, "grad_norm": 1.4561533695152975, "learning_rate": 3.323818509416763e-06, "loss": 0.6839, "step": 9695 }, { "epoch": 0.62, "grad_norm": 1.2228376711588964, "learning_rate": 3.32284197110502e-06, "loss": 0.664, "step": 9696 }, { "epoch": 0.62, "grad_norm": 1.536658017955305, "learning_rate": 3.321865504874002e-06, "loss": 0.6172, "step": 9697 }, { "epoch": 0.62, "grad_norm": 1.8890460228546677, "learning_rate": 3.3208891107656817e-06, "loss": 0.7019, "step": 9698 }, { "epoch": 0.62, "grad_norm": 1.6459606941982035, "learning_rate": 3.3199127888220162e-06, "loss": 0.6484, "step": 9699 }, { "epoch": 0.62, "grad_norm": 1.222864847824162, "learning_rate": 3.31893653908497e-06, "loss": 0.7615, "step": 9700 }, { "epoch": 0.62, "grad_norm": 1.4293480779138823, "learning_rate": 3.317960361596498e-06, "loss": 0.6476, "step": 9701 }, { "epoch": 0.62, "grad_norm": 1.6455020434486698, "learning_rate": 3.3169842563985568e-06, "loss": 0.6667, "step": 9702 }, { "epoch": 0.62, "grad_norm": 1.6443901863127972, "learning_rate": 3.3160082235330937e-06, "loss": 0.6865, "step": 9703 }, { "epoch": 0.62, "grad_norm": 1.4539523997778474, "learning_rate": 3.3150322630420597e-06, "loss": 0.6445, "step": 9704 }, { "epoch": 0.62, "grad_norm": 1.286258703035481, "learning_rate": 3.3140563749673994e-06, "loss": 0.6479, "step": 9705 }, { "epoch": 0.62, "grad_norm": 1.5976979205363462, "learning_rate": 3.313080559351052e-06, "loss": 0.6391, "step": 9706 }, { "epoch": 0.62, "grad_norm": 1.393981942210728, "learning_rate": 3.3121048162349577e-06, "loss": 0.7763, "step": 9707 }, { "epoch": 0.62, "grad_norm": 1.5088120453338427, "learning_rate": 3.3111291456610517e-06, "loss": 0.6688, "step": 9708 }, { "epoch": 0.62, "grad_norm": 1.7710346586229273, "learning_rate": 3.3101535476712675e-06, "loss": 0.7466, "step": 9709 }, { "epoch": 0.62, "grad_norm": 1.5375418511921968, "learning_rate": 3.309178022307531e-06, "loss": 0.7164, "step": 9710 }, { "epoch": 0.62, "grad_norm": 2.112271269079771, "learning_rate": 3.3082025696117723e-06, "loss": 0.6976, "step": 9711 }, { "epoch": 0.62, "grad_norm": 2.133609294082444, "learning_rate": 3.30722718962591e-06, "loss": 0.7385, "step": 9712 }, { "epoch": 0.62, "grad_norm": 1.6039870633018094, "learning_rate": 3.3062518823918664e-06, "loss": 0.6989, "step": 9713 }, { "epoch": 0.62, "grad_norm": 1.5830828096981504, "learning_rate": 3.3052766479515585e-06, "loss": 0.7134, "step": 9714 }, { "epoch": 0.62, "grad_norm": 1.7778639424351395, "learning_rate": 3.3043014863468985e-06, "loss": 0.6735, "step": 9715 }, { "epoch": 0.62, "grad_norm": 1.3598510063837697, "learning_rate": 3.303326397619799e-06, "loss": 0.6532, "step": 9716 }, { "epoch": 0.62, "grad_norm": 0.9567287752946569, "learning_rate": 3.3023513818121645e-06, "loss": 0.5579, "step": 9717 }, { "epoch": 0.62, "grad_norm": 1.1540411970844582, "learning_rate": 3.301376438965902e-06, "loss": 0.6946, "step": 9718 }, { "epoch": 0.62, "grad_norm": 1.9014500869846247, "learning_rate": 3.3004015691229086e-06, "loss": 0.6579, "step": 9719 }, { "epoch": 0.62, "grad_norm": 1.2622568431127081, "learning_rate": 3.299426772325086e-06, "loss": 0.6669, "step": 9720 }, { "epoch": 0.62, "grad_norm": 1.600298807070112, "learning_rate": 3.298452048614326e-06, "loss": 0.7449, "step": 9721 }, { "epoch": 0.62, "grad_norm": 1.5247199679019368, "learning_rate": 3.2974773980325216e-06, "loss": 0.6385, "step": 9722 }, { "epoch": 0.62, "grad_norm": 1.6352875662856525, "learning_rate": 3.2965028206215627e-06, "loss": 0.6311, "step": 9723 }, { "epoch": 0.62, "grad_norm": 1.560022741966234, "learning_rate": 3.295528316423331e-06, "loss": 0.6859, "step": 9724 }, { "epoch": 0.62, "grad_norm": 1.4249905173749642, "learning_rate": 3.2945538854797134e-06, "loss": 0.6416, "step": 9725 }, { "epoch": 0.62, "grad_norm": 1.6824666099657601, "learning_rate": 3.293579527832584e-06, "loss": 0.7016, "step": 9726 }, { "epoch": 0.62, "grad_norm": 4.8663091560144105, "learning_rate": 3.292605243523821e-06, "loss": 0.6255, "step": 9727 }, { "epoch": 0.62, "grad_norm": 1.183525239031217, "learning_rate": 3.2916310325952976e-06, "loss": 0.6581, "step": 9728 }, { "epoch": 0.62, "grad_norm": 1.3712813411105058, "learning_rate": 3.2906568950888827e-06, "loss": 0.6579, "step": 9729 }, { "epoch": 0.62, "grad_norm": 1.2561977643751059, "learning_rate": 3.2896828310464412e-06, "loss": 0.5702, "step": 9730 }, { "epoch": 0.62, "grad_norm": 1.2072141358499868, "learning_rate": 3.288708840509838e-06, "loss": 0.5783, "step": 9731 }, { "epoch": 0.62, "grad_norm": 1.0279169164153936, "learning_rate": 3.2877349235209355e-06, "loss": 0.6843, "step": 9732 }, { "epoch": 0.62, "grad_norm": 1.6873721813386255, "learning_rate": 3.286761080121585e-06, "loss": 0.6814, "step": 9733 }, { "epoch": 0.62, "grad_norm": 1.588833540725588, "learning_rate": 3.285787310353646e-06, "loss": 0.7694, "step": 9734 }, { "epoch": 0.62, "grad_norm": 0.9901954805387424, "learning_rate": 3.2848136142589637e-06, "loss": 0.5373, "step": 9735 }, { "epoch": 0.62, "grad_norm": 1.1100481877823443, "learning_rate": 3.2838399918793893e-06, "loss": 0.5522, "step": 9736 }, { "epoch": 0.62, "grad_norm": 1.6017455169200627, "learning_rate": 3.282866443256765e-06, "loss": 0.6329, "step": 9737 }, { "epoch": 0.62, "grad_norm": 1.4662557942814973, "learning_rate": 3.2818929684329352e-06, "loss": 0.669, "step": 9738 }, { "epoch": 0.62, "grad_norm": 1.5174595355312839, "learning_rate": 3.280919567449733e-06, "loss": 0.6685, "step": 9739 }, { "epoch": 0.62, "grad_norm": 1.3372129414341782, "learning_rate": 3.2799462403489955e-06, "loss": 0.6466, "step": 9740 }, { "epoch": 0.62, "grad_norm": 2.014931721796082, "learning_rate": 3.278972987172556e-06, "loss": 0.7756, "step": 9741 }, { "epoch": 0.62, "grad_norm": 1.6676619946490536, "learning_rate": 3.27799980796224e-06, "loss": 0.7032, "step": 9742 }, { "epoch": 0.62, "grad_norm": 1.6089678798696478, "learning_rate": 3.277026702759874e-06, "loss": 0.7162, "step": 9743 }, { "epoch": 0.62, "grad_norm": 1.599573764817117, "learning_rate": 3.276053671607279e-06, "loss": 0.6405, "step": 9744 }, { "epoch": 0.62, "grad_norm": 1.6275939260506638, "learning_rate": 3.275080714546277e-06, "loss": 0.582, "step": 9745 }, { "epoch": 0.62, "grad_norm": 2.1201241150412344, "learning_rate": 3.274107831618679e-06, "loss": 0.6111, "step": 9746 }, { "epoch": 0.62, "grad_norm": 1.5470656802644378, "learning_rate": 3.2731350228663024e-06, "loss": 0.6486, "step": 9747 }, { "epoch": 0.62, "grad_norm": 1.3444508786426315, "learning_rate": 3.2721622883309512e-06, "loss": 0.6429, "step": 9748 }, { "epoch": 0.62, "grad_norm": 1.4108712214680166, "learning_rate": 3.2711896280544343e-06, "loss": 0.6326, "step": 9749 }, { "epoch": 0.62, "grad_norm": 1.4096241312978088, "learning_rate": 3.2702170420785558e-06, "loss": 0.6161, "step": 9750 }, { "epoch": 0.62, "grad_norm": 1.5002504477089664, "learning_rate": 3.2692445304451128e-06, "loss": 0.6968, "step": 9751 }, { "epoch": 0.62, "grad_norm": 1.414935265107785, "learning_rate": 3.2682720931959043e-06, "loss": 0.6667, "step": 9752 }, { "epoch": 0.62, "grad_norm": 1.5145509219902142, "learning_rate": 3.2672997303727217e-06, "loss": 0.677, "step": 9753 }, { "epoch": 0.62, "grad_norm": 1.4240262242798682, "learning_rate": 3.2663274420173576e-06, "loss": 0.643, "step": 9754 }, { "epoch": 0.62, "grad_norm": 1.5536890357491773, "learning_rate": 3.265355228171594e-06, "loss": 0.7062, "step": 9755 }, { "epoch": 0.62, "grad_norm": 1.783510430122966, "learning_rate": 3.26438308887722e-06, "loss": 0.625, "step": 9756 }, { "epoch": 0.62, "grad_norm": 1.5175380921297326, "learning_rate": 3.263411024176012e-06, "loss": 0.6738, "step": 9757 }, { "epoch": 0.62, "grad_norm": 1.5539371790126224, "learning_rate": 3.262439034109749e-06, "loss": 0.6202, "step": 9758 }, { "epoch": 0.62, "grad_norm": 1.6710172898456912, "learning_rate": 3.2614671187202075e-06, "loss": 0.7312, "step": 9759 }, { "epoch": 0.62, "grad_norm": 1.5385131942857215, "learning_rate": 3.2604952780491537e-06, "loss": 0.6892, "step": 9760 }, { "epoch": 0.62, "grad_norm": 1.4621996544550258, "learning_rate": 3.2595235121383608e-06, "loss": 0.6303, "step": 9761 }, { "epoch": 0.62, "grad_norm": 1.3283826704184254, "learning_rate": 3.2585518210295873e-06, "loss": 0.6139, "step": 9762 }, { "epoch": 0.62, "grad_norm": 1.1855484380641714, "learning_rate": 3.2575802047645977e-06, "loss": 0.6623, "step": 9763 }, { "epoch": 0.62, "grad_norm": 1.045649110531631, "learning_rate": 3.2566086633851498e-06, "loss": 0.6051, "step": 9764 }, { "epoch": 0.63, "grad_norm": 1.183512363391031, "learning_rate": 3.2556371969329992e-06, "loss": 0.7119, "step": 9765 }, { "epoch": 0.63, "grad_norm": 1.4460057429112279, "learning_rate": 3.2546658054498947e-06, "loss": 0.661, "step": 9766 }, { "epoch": 0.63, "grad_norm": 1.5382628201372355, "learning_rate": 3.2536944889775868e-06, "loss": 0.7295, "step": 9767 }, { "epoch": 0.63, "grad_norm": 1.5512510552426235, "learning_rate": 3.252723247557822e-06, "loss": 0.5613, "step": 9768 }, { "epoch": 0.63, "grad_norm": 1.505896664502305, "learning_rate": 3.2517520812323382e-06, "loss": 0.553, "step": 9769 }, { "epoch": 0.63, "grad_norm": 1.5570169840464305, "learning_rate": 3.2507809900428786e-06, "loss": 0.6556, "step": 9770 }, { "epoch": 0.63, "grad_norm": 1.5422331153610316, "learning_rate": 3.2498099740311752e-06, "loss": 0.714, "step": 9771 }, { "epoch": 0.63, "grad_norm": 1.4934138266723544, "learning_rate": 3.2488390332389613e-06, "loss": 0.6731, "step": 9772 }, { "epoch": 0.63, "grad_norm": 1.5364685361845027, "learning_rate": 3.2478681677079655e-06, "loss": 0.6927, "step": 9773 }, { "epoch": 0.63, "grad_norm": 1.4318543562856831, "learning_rate": 3.246897377479916e-06, "loss": 0.7457, "step": 9774 }, { "epoch": 0.63, "grad_norm": 1.5342154677149675, "learning_rate": 3.2459266625965315e-06, "loss": 0.7602, "step": 9775 }, { "epoch": 0.63, "grad_norm": 1.4973350264369143, "learning_rate": 3.2449560230995354e-06, "loss": 0.7254, "step": 9776 }, { "epoch": 0.63, "grad_norm": 1.5602376135350704, "learning_rate": 3.24398545903064e-06, "loss": 0.7596, "step": 9777 }, { "epoch": 0.63, "grad_norm": 1.4861189323323711, "learning_rate": 3.243014970431558e-06, "loss": 0.674, "step": 9778 }, { "epoch": 0.63, "grad_norm": 1.1299421038688002, "learning_rate": 3.2420445573440027e-06, "loss": 0.7451, "step": 9779 }, { "epoch": 0.63, "grad_norm": 1.0428119383817092, "learning_rate": 3.241074219809678e-06, "loss": 0.6095, "step": 9780 }, { "epoch": 0.63, "grad_norm": 1.684050952608692, "learning_rate": 3.2401039578702886e-06, "loss": 0.5872, "step": 9781 }, { "epoch": 0.63, "grad_norm": 1.5215051627817702, "learning_rate": 3.2391337715675314e-06, "loss": 0.71, "step": 9782 }, { "epoch": 0.63, "grad_norm": 15.843328180725885, "learning_rate": 3.2381636609431065e-06, "loss": 0.684, "step": 9783 }, { "epoch": 0.63, "grad_norm": 1.6789580820927856, "learning_rate": 3.2371936260387027e-06, "loss": 0.6515, "step": 9784 }, { "epoch": 0.63, "grad_norm": 1.4573594182764464, "learning_rate": 3.2362236668960144e-06, "loss": 0.6775, "step": 9785 }, { "epoch": 0.63, "grad_norm": 1.2782758430624337, "learning_rate": 3.2352537835567255e-06, "loss": 0.5965, "step": 9786 }, { "epoch": 0.63, "grad_norm": 1.6456678982165458, "learning_rate": 3.234283976062521e-06, "loss": 0.7521, "step": 9787 }, { "epoch": 0.63, "grad_norm": 1.6353092292876668, "learning_rate": 3.2333142444550825e-06, "loss": 0.7262, "step": 9788 }, { "epoch": 0.63, "grad_norm": 1.5785749695185782, "learning_rate": 3.2323445887760837e-06, "loss": 0.7311, "step": 9789 }, { "epoch": 0.63, "grad_norm": 1.493489044355371, "learning_rate": 3.231375009067202e-06, "loss": 0.6936, "step": 9790 }, { "epoch": 0.63, "grad_norm": 1.4266558077642302, "learning_rate": 3.230405505370104e-06, "loss": 0.6872, "step": 9791 }, { "epoch": 0.63, "grad_norm": 1.6065574878315159, "learning_rate": 3.2294360777264598e-06, "loss": 0.7152, "step": 9792 }, { "epoch": 0.63, "grad_norm": 1.413273924829305, "learning_rate": 3.228466726177932e-06, "loss": 0.6397, "step": 9793 }, { "epoch": 0.63, "grad_norm": 1.444274410482709, "learning_rate": 3.227497450766184e-06, "loss": 0.5947, "step": 9794 }, { "epoch": 0.63, "grad_norm": 1.619168439546705, "learning_rate": 3.2265282515328676e-06, "loss": 0.5355, "step": 9795 }, { "epoch": 0.63, "grad_norm": 1.5219725874363461, "learning_rate": 3.225559128519642e-06, "loss": 0.7042, "step": 9796 }, { "epoch": 0.63, "grad_norm": 1.7548335892580922, "learning_rate": 3.2245900817681576e-06, "loss": 0.6818, "step": 9797 }, { "epoch": 0.63, "grad_norm": 1.8491106894949678, "learning_rate": 3.223621111320059e-06, "loss": 0.6717, "step": 9798 }, { "epoch": 0.63, "grad_norm": 0.9762553095341939, "learning_rate": 3.2226522172169928e-06, "loss": 0.6868, "step": 9799 }, { "epoch": 0.63, "grad_norm": 1.570423805083455, "learning_rate": 3.221683399500599e-06, "loss": 0.7138, "step": 9800 }, { "epoch": 0.63, "grad_norm": 1.9474624978122004, "learning_rate": 3.220714658212518e-06, "loss": 0.5907, "step": 9801 }, { "epoch": 0.63, "grad_norm": 1.698496831795288, "learning_rate": 3.219745993394381e-06, "loss": 0.688, "step": 9802 }, { "epoch": 0.63, "grad_norm": 1.4962323143346616, "learning_rate": 3.2187774050878213e-06, "loss": 0.6362, "step": 9803 }, { "epoch": 0.63, "grad_norm": 1.078257061718514, "learning_rate": 3.2178088933344644e-06, "loss": 0.5854, "step": 9804 }, { "epoch": 0.63, "grad_norm": 1.6620625017247435, "learning_rate": 3.2168404581759362e-06, "loss": 0.7258, "step": 9805 }, { "epoch": 0.63, "grad_norm": 1.40746371779317, "learning_rate": 3.215872099653859e-06, "loss": 0.6638, "step": 9806 }, { "epoch": 0.63, "grad_norm": 1.3269874403483115, "learning_rate": 3.21490381780985e-06, "loss": 0.5875, "step": 9807 }, { "epoch": 0.63, "grad_norm": 1.5106064674557478, "learning_rate": 3.2139356126855235e-06, "loss": 0.6249, "step": 9808 }, { "epoch": 0.63, "grad_norm": 1.5389478855743446, "learning_rate": 3.212967484322491e-06, "loss": 0.6563, "step": 9809 }, { "epoch": 0.63, "grad_norm": 1.4590012183928007, "learning_rate": 3.211999432762363e-06, "loss": 0.6663, "step": 9810 }, { "epoch": 0.63, "grad_norm": 1.3804725316928932, "learning_rate": 3.2110314580467404e-06, "loss": 0.6431, "step": 9811 }, { "epoch": 0.63, "grad_norm": 1.4470770411820824, "learning_rate": 3.210063560217228e-06, "loss": 0.5826, "step": 9812 }, { "epoch": 0.63, "grad_norm": 1.519642696880388, "learning_rate": 3.209095739315421e-06, "loss": 0.7233, "step": 9813 }, { "epoch": 0.63, "grad_norm": 1.8160337394279222, "learning_rate": 3.208127995382916e-06, "loss": 0.6499, "step": 9814 }, { "epoch": 0.63, "grad_norm": 1.5902066626971278, "learning_rate": 3.2071603284613052e-06, "loss": 0.7219, "step": 9815 }, { "epoch": 0.63, "grad_norm": 1.8202276946603886, "learning_rate": 3.2061927385921756e-06, "loss": 0.6508, "step": 9816 }, { "epoch": 0.63, "grad_norm": 1.1321772155241783, "learning_rate": 3.2052252258171142e-06, "loss": 0.6644, "step": 9817 }, { "epoch": 0.63, "grad_norm": 1.5318692098623048, "learning_rate": 3.2042577901776994e-06, "loss": 0.6153, "step": 9818 }, { "epoch": 0.63, "grad_norm": 1.5180523245202098, "learning_rate": 3.2032904317155133e-06, "loss": 0.6374, "step": 9819 }, { "epoch": 0.63, "grad_norm": 0.9303937422690529, "learning_rate": 3.2023231504721274e-06, "loss": 0.549, "step": 9820 }, { "epoch": 0.63, "grad_norm": 1.601302969379586, "learning_rate": 3.2013559464891162e-06, "loss": 0.6549, "step": 9821 }, { "epoch": 0.63, "grad_norm": 1.6393925768731996, "learning_rate": 3.200388819808046e-06, "loss": 0.6752, "step": 9822 }, { "epoch": 0.63, "grad_norm": 1.9029484635753124, "learning_rate": 3.199421770470482e-06, "loss": 0.6391, "step": 9823 }, { "epoch": 0.63, "grad_norm": 1.5139579558803573, "learning_rate": 3.198454798517989e-06, "loss": 0.7178, "step": 9824 }, { "epoch": 0.63, "grad_norm": 1.3299308640806318, "learning_rate": 3.1974879039921214e-06, "loss": 0.6529, "step": 9825 }, { "epoch": 0.63, "grad_norm": 1.3706433913360214, "learning_rate": 3.1965210869344385e-06, "loss": 0.7072, "step": 9826 }, { "epoch": 0.63, "grad_norm": 0.9836252879714068, "learning_rate": 3.1955543473864868e-06, "loss": 0.6155, "step": 9827 }, { "epoch": 0.63, "grad_norm": 1.0533372953533897, "learning_rate": 3.1945876853898194e-06, "loss": 0.6472, "step": 9828 }, { "epoch": 0.63, "grad_norm": 1.4418143918422992, "learning_rate": 3.1936211009859786e-06, "loss": 0.6339, "step": 9829 }, { "epoch": 0.63, "grad_norm": 1.6187288636271004, "learning_rate": 3.192654594216509e-06, "loss": 0.6677, "step": 9830 }, { "epoch": 0.63, "grad_norm": 1.5440040242111914, "learning_rate": 3.1916881651229447e-06, "loss": 0.638, "step": 9831 }, { "epoch": 0.63, "grad_norm": 1.6638050634306725, "learning_rate": 3.190721813746824e-06, "loss": 0.6345, "step": 9832 }, { "epoch": 0.63, "grad_norm": 1.3486602325246553, "learning_rate": 3.189755540129679e-06, "loss": 0.6989, "step": 9833 }, { "epoch": 0.63, "grad_norm": 1.5832955922473342, "learning_rate": 3.1887893443130356e-06, "loss": 0.6361, "step": 9834 }, { "epoch": 0.63, "grad_norm": 1.4263231635856501, "learning_rate": 3.1878232263384214e-06, "loss": 0.6315, "step": 9835 }, { "epoch": 0.63, "grad_norm": 1.5391603041263, "learning_rate": 3.1868571862473563e-06, "loss": 0.6577, "step": 9836 }, { "epoch": 0.63, "grad_norm": 1.3617224797251173, "learning_rate": 3.1858912240813607e-06, "loss": 0.6975, "step": 9837 }, { "epoch": 0.63, "grad_norm": 2.05489093932231, "learning_rate": 3.1849253398819467e-06, "loss": 0.6811, "step": 9838 }, { "epoch": 0.63, "grad_norm": 1.8578315171470647, "learning_rate": 3.183959533690629e-06, "loss": 0.7937, "step": 9839 }, { "epoch": 0.63, "grad_norm": 1.605854900660592, "learning_rate": 3.1829938055489135e-06, "loss": 0.7432, "step": 9840 }, { "epoch": 0.63, "grad_norm": 1.549554839064906, "learning_rate": 3.1820281554983056e-06, "loss": 0.6409, "step": 9841 }, { "epoch": 0.63, "grad_norm": 1.0977160994621167, "learning_rate": 3.181062583580309e-06, "loss": 0.6894, "step": 9842 }, { "epoch": 0.63, "grad_norm": 1.66291222299345, "learning_rate": 3.1800970898364194e-06, "loss": 0.6529, "step": 9843 }, { "epoch": 0.63, "grad_norm": 1.476896390673199, "learning_rate": 3.1791316743081333e-06, "loss": 0.6986, "step": 9844 }, { "epoch": 0.63, "grad_norm": 1.7727835465736337, "learning_rate": 3.178166337036941e-06, "loss": 0.7127, "step": 9845 }, { "epoch": 0.63, "grad_norm": 1.5174931992262206, "learning_rate": 3.177201078064333e-06, "loss": 0.6404, "step": 9846 }, { "epoch": 0.63, "grad_norm": 1.6283144286159756, "learning_rate": 3.176235897431791e-06, "loss": 0.7051, "step": 9847 }, { "epoch": 0.63, "grad_norm": 1.6225685647770867, "learning_rate": 3.175270795180799e-06, "loss": 0.7396, "step": 9848 }, { "epoch": 0.63, "grad_norm": 1.5156134792976825, "learning_rate": 3.1743057713528325e-06, "loss": 0.6804, "step": 9849 }, { "epoch": 0.63, "grad_norm": 1.6685026414014639, "learning_rate": 3.1733408259893683e-06, "loss": 0.6544, "step": 9850 }, { "epoch": 0.63, "grad_norm": 1.5118255428311949, "learning_rate": 3.172375959131876e-06, "loss": 0.7219, "step": 9851 }, { "epoch": 0.63, "grad_norm": 1.4089972123002596, "learning_rate": 3.1714111708218247e-06, "loss": 0.6173, "step": 9852 }, { "epoch": 0.63, "grad_norm": 1.6669282752925605, "learning_rate": 3.1704464611006813e-06, "loss": 0.6079, "step": 9853 }, { "epoch": 0.63, "grad_norm": 0.9913418137729485, "learning_rate": 3.169481830009902e-06, "loss": 0.6844, "step": 9854 }, { "epoch": 0.63, "grad_norm": 1.1064073734153341, "learning_rate": 3.16851727759095e-06, "loss": 0.6513, "step": 9855 }, { "epoch": 0.63, "grad_norm": 1.4706955638344943, "learning_rate": 3.1675528038852743e-06, "loss": 0.6976, "step": 9856 }, { "epoch": 0.63, "grad_norm": 1.489012560131206, "learning_rate": 3.1665884089343296e-06, "loss": 0.6386, "step": 9857 }, { "epoch": 0.63, "grad_norm": 2.959037805675465, "learning_rate": 3.1656240927795617e-06, "loss": 0.6558, "step": 9858 }, { "epoch": 0.63, "grad_norm": 1.5859325489431506, "learning_rate": 3.1646598554624174e-06, "loss": 0.6515, "step": 9859 }, { "epoch": 0.63, "grad_norm": 1.471681483622446, "learning_rate": 3.1636956970243347e-06, "loss": 0.7669, "step": 9860 }, { "epoch": 0.63, "grad_norm": 1.6293848775557855, "learning_rate": 3.1627316175067515e-06, "loss": 0.6392, "step": 9861 }, { "epoch": 0.63, "grad_norm": 1.6029224076512651, "learning_rate": 3.161767616951105e-06, "loss": 0.6382, "step": 9862 }, { "epoch": 0.63, "grad_norm": 1.4857550064010412, "learning_rate": 3.160803695398822e-06, "loss": 0.6493, "step": 9863 }, { "epoch": 0.63, "grad_norm": 1.584074857584857, "learning_rate": 3.1598398528913323e-06, "loss": 0.7808, "step": 9864 }, { "epoch": 0.63, "grad_norm": 1.8122645226201006, "learning_rate": 3.1588760894700575e-06, "loss": 0.6813, "step": 9865 }, { "epoch": 0.63, "grad_norm": 1.473402258472466, "learning_rate": 3.1579124051764224e-06, "loss": 0.6081, "step": 9866 }, { "epoch": 0.63, "grad_norm": 1.3578224442844125, "learning_rate": 3.156948800051839e-06, "loss": 0.6539, "step": 9867 }, { "epoch": 0.63, "grad_norm": 1.5733286733038574, "learning_rate": 3.155985274137726e-06, "loss": 0.6511, "step": 9868 }, { "epoch": 0.63, "grad_norm": 1.0697053373283814, "learning_rate": 3.1550218274754887e-06, "loss": 0.591, "step": 9869 }, { "epoch": 0.63, "grad_norm": 1.9607504633458472, "learning_rate": 3.154058460106536e-06, "loss": 0.6535, "step": 9870 }, { "epoch": 0.63, "grad_norm": 1.526173733009808, "learning_rate": 3.153095172072273e-06, "loss": 0.611, "step": 9871 }, { "epoch": 0.63, "grad_norm": 2.4640177555321876, "learning_rate": 3.1521319634140978e-06, "loss": 0.5901, "step": 9872 }, { "epoch": 0.63, "grad_norm": 1.1708208955036323, "learning_rate": 3.15116883417341e-06, "loss": 0.5944, "step": 9873 }, { "epoch": 0.63, "grad_norm": 1.4944809620362522, "learning_rate": 3.1502057843915983e-06, "loss": 0.7152, "step": 9874 }, { "epoch": 0.63, "grad_norm": 1.715566577890559, "learning_rate": 3.1492428141100575e-06, "loss": 0.7641, "step": 9875 }, { "epoch": 0.63, "grad_norm": 1.6531015209576245, "learning_rate": 3.1482799233701696e-06, "loss": 0.6289, "step": 9876 }, { "epoch": 0.63, "grad_norm": 1.6292436344030026, "learning_rate": 3.1473171122133207e-06, "loss": 0.7399, "step": 9877 }, { "epoch": 0.63, "grad_norm": 1.5559354970022228, "learning_rate": 3.1463543806808876e-06, "loss": 0.6935, "step": 9878 }, { "epoch": 0.63, "grad_norm": 1.3721338122046352, "learning_rate": 3.14539172881425e-06, "loss": 0.6836, "step": 9879 }, { "epoch": 0.63, "grad_norm": 1.468771127386484, "learning_rate": 3.144429156654778e-06, "loss": 0.6275, "step": 9880 }, { "epoch": 0.63, "grad_norm": 1.3703609883298546, "learning_rate": 3.1434666642438416e-06, "loss": 0.6966, "step": 9881 }, { "epoch": 0.63, "grad_norm": 1.5528443057052652, "learning_rate": 3.1425042516228083e-06, "loss": 0.6111, "step": 9882 }, { "epoch": 0.63, "grad_norm": 1.4341760556064906, "learning_rate": 3.141541918833037e-06, "loss": 0.6278, "step": 9883 }, { "epoch": 0.63, "grad_norm": 1.833813719175191, "learning_rate": 3.1405796659158907e-06, "loss": 0.6945, "step": 9884 }, { "epoch": 0.63, "grad_norm": 1.5053582613668093, "learning_rate": 3.139617492912722e-06, "loss": 0.6725, "step": 9885 }, { "epoch": 0.63, "grad_norm": 2.0434972621291982, "learning_rate": 3.138655399864885e-06, "loss": 0.7127, "step": 9886 }, { "epoch": 0.63, "grad_norm": 1.464827697791511, "learning_rate": 3.1376933868137267e-06, "loss": 0.6639, "step": 9887 }, { "epoch": 0.63, "grad_norm": 1.7603885537114328, "learning_rate": 3.136731453800593e-06, "loss": 0.6986, "step": 9888 }, { "epoch": 0.63, "grad_norm": 1.462092992089482, "learning_rate": 3.1357696008668285e-06, "loss": 0.6436, "step": 9889 }, { "epoch": 0.63, "grad_norm": 1.4459558418501706, "learning_rate": 3.1348078280537676e-06, "loss": 0.6476, "step": 9890 }, { "epoch": 0.63, "grad_norm": 3.788281879772587, "learning_rate": 3.133846135402748e-06, "loss": 0.6412, "step": 9891 }, { "epoch": 0.63, "grad_norm": 1.5806497697193926, "learning_rate": 3.1328845229550988e-06, "loss": 0.7399, "step": 9892 }, { "epoch": 0.63, "grad_norm": 1.505131115241346, "learning_rate": 3.1319229907521502e-06, "loss": 0.7186, "step": 9893 }, { "epoch": 0.63, "grad_norm": 1.4555129857140054, "learning_rate": 3.1309615388352255e-06, "loss": 0.6275, "step": 9894 }, { "epoch": 0.63, "grad_norm": 1.5164753408536193, "learning_rate": 3.130000167245648e-06, "loss": 0.6515, "step": 9895 }, { "epoch": 0.63, "grad_norm": 1.6506294151727643, "learning_rate": 3.129038876024732e-06, "loss": 0.6455, "step": 9896 }, { "epoch": 0.63, "grad_norm": 1.651926752167865, "learning_rate": 3.128077665213794e-06, "loss": 0.691, "step": 9897 }, { "epoch": 0.63, "grad_norm": 1.6083246947360539, "learning_rate": 3.1271165348541465e-06, "loss": 0.7209, "step": 9898 }, { "epoch": 0.63, "grad_norm": 1.192661848219767, "learning_rate": 3.1261554849870925e-06, "loss": 0.6054, "step": 9899 }, { "epoch": 0.63, "grad_norm": 1.493500180527126, "learning_rate": 3.1251945156539394e-06, "loss": 0.6174, "step": 9900 }, { "epoch": 0.63, "grad_norm": 1.6659105447690432, "learning_rate": 3.124233626895985e-06, "loss": 0.6868, "step": 9901 }, { "epoch": 0.63, "grad_norm": 1.5788625960394918, "learning_rate": 3.1232728187545303e-06, "loss": 0.6532, "step": 9902 }, { "epoch": 0.63, "grad_norm": 1.7215617018309015, "learning_rate": 3.1223120912708647e-06, "loss": 0.8181, "step": 9903 }, { "epoch": 0.63, "grad_norm": 1.6694758335671485, "learning_rate": 3.1213514444862815e-06, "loss": 0.6338, "step": 9904 }, { "epoch": 0.63, "grad_norm": 1.5513906468052505, "learning_rate": 3.1203908784420635e-06, "loss": 0.6854, "step": 9905 }, { "epoch": 0.63, "grad_norm": 1.6674933778263665, "learning_rate": 3.1194303931794958e-06, "loss": 0.6897, "step": 9906 }, { "epoch": 0.63, "grad_norm": 1.702027257075494, "learning_rate": 3.1184699887398594e-06, "loss": 0.7204, "step": 9907 }, { "epoch": 0.63, "grad_norm": 1.4822543034384008, "learning_rate": 3.117509665164428e-06, "loss": 0.7395, "step": 9908 }, { "epoch": 0.63, "grad_norm": 1.4445468471335046, "learning_rate": 3.116549422494478e-06, "loss": 0.722, "step": 9909 }, { "epoch": 0.63, "grad_norm": 1.4798453164199186, "learning_rate": 3.115589260771274e-06, "loss": 0.6575, "step": 9910 }, { "epoch": 0.63, "grad_norm": 1.489870153396975, "learning_rate": 3.1146291800360863e-06, "loss": 0.7077, "step": 9911 }, { "epoch": 0.63, "grad_norm": 1.4802543805109518, "learning_rate": 3.1136691803301717e-06, "loss": 0.6526, "step": 9912 }, { "epoch": 0.63, "grad_norm": 1.4044302575081598, "learning_rate": 3.1127092616947935e-06, "loss": 0.675, "step": 9913 }, { "epoch": 0.63, "grad_norm": 1.5352684849498932, "learning_rate": 3.1117494241712043e-06, "loss": 0.6779, "step": 9914 }, { "epoch": 0.63, "grad_norm": 1.553546835762093, "learning_rate": 3.1107896678006577e-06, "loss": 0.6995, "step": 9915 }, { "epoch": 0.63, "grad_norm": 1.5424219750144696, "learning_rate": 3.109829992624403e-06, "loss": 0.6269, "step": 9916 }, { "epoch": 0.63, "grad_norm": 1.5575980079919893, "learning_rate": 3.108870398683681e-06, "loss": 0.6998, "step": 9917 }, { "epoch": 0.63, "grad_norm": 1.4713626205449726, "learning_rate": 3.1079108860197382e-06, "loss": 0.6462, "step": 9918 }, { "epoch": 0.63, "grad_norm": 1.6554677166282448, "learning_rate": 3.106951454673808e-06, "loss": 0.6487, "step": 9919 }, { "epoch": 0.63, "grad_norm": 1.4930739350163968, "learning_rate": 3.105992104687126e-06, "loss": 0.6398, "step": 9920 }, { "epoch": 0.64, "grad_norm": 1.2996861156370252, "learning_rate": 3.105032836100925e-06, "loss": 0.6735, "step": 9921 }, { "epoch": 0.64, "grad_norm": 1.5323340799615366, "learning_rate": 3.1040736489564296e-06, "loss": 0.6491, "step": 9922 }, { "epoch": 0.64, "grad_norm": 1.5122312421156243, "learning_rate": 3.1031145432948644e-06, "loss": 0.694, "step": 9923 }, { "epoch": 0.64, "grad_norm": 1.5018737799117332, "learning_rate": 3.1021555191574527e-06, "loss": 0.7172, "step": 9924 }, { "epoch": 0.64, "grad_norm": 1.529565660712865, "learning_rate": 3.1011965765854056e-06, "loss": 0.7079, "step": 9925 }, { "epoch": 0.64, "grad_norm": 1.4498422857985935, "learning_rate": 3.100237715619941e-06, "loss": 0.688, "step": 9926 }, { "epoch": 0.64, "grad_norm": 1.4451218594492081, "learning_rate": 3.099278936302269e-06, "loss": 0.6602, "step": 9927 }, { "epoch": 0.64, "grad_norm": 1.4642292256968663, "learning_rate": 3.0983202386735923e-06, "loss": 0.6559, "step": 9928 }, { "epoch": 0.64, "grad_norm": 1.5464090470754375, "learning_rate": 3.097361622775117e-06, "loss": 0.6801, "step": 9929 }, { "epoch": 0.64, "grad_norm": 1.6642508938937404, "learning_rate": 3.096403088648039e-06, "loss": 0.6134, "step": 9930 }, { "epoch": 0.64, "grad_norm": 1.4538076256142973, "learning_rate": 3.095444636333559e-06, "loss": 0.7603, "step": 9931 }, { "epoch": 0.64, "grad_norm": 1.5050444985263363, "learning_rate": 3.094486265872865e-06, "loss": 0.6353, "step": 9932 }, { "epoch": 0.64, "grad_norm": 1.5822570811351746, "learning_rate": 3.0935279773071492e-06, "loss": 0.6645, "step": 9933 }, { "epoch": 0.64, "grad_norm": 1.6051461700022174, "learning_rate": 3.0925697706775926e-06, "loss": 0.6774, "step": 9934 }, { "epoch": 0.64, "grad_norm": 1.5325803762189625, "learning_rate": 3.0916116460253787e-06, "loss": 0.6233, "step": 9935 }, { "epoch": 0.64, "grad_norm": 1.6780923735418274, "learning_rate": 3.0906536033916878e-06, "loss": 0.6792, "step": 9936 }, { "epoch": 0.64, "grad_norm": 1.0500059639888282, "learning_rate": 3.0896956428176916e-06, "loss": 0.6102, "step": 9937 }, { "epoch": 0.64, "grad_norm": 1.671143741769896, "learning_rate": 3.088737764344565e-06, "loss": 0.6298, "step": 9938 }, { "epoch": 0.64, "grad_norm": 1.7370423123682255, "learning_rate": 3.0877799680134713e-06, "loss": 0.701, "step": 9939 }, { "epoch": 0.64, "grad_norm": 1.825689673995127, "learning_rate": 3.086822253865578e-06, "loss": 0.5947, "step": 9940 }, { "epoch": 0.64, "grad_norm": 1.6535721819206102, "learning_rate": 3.085864621942042e-06, "loss": 0.6778, "step": 9941 }, { "epoch": 0.64, "grad_norm": 1.6769632762896896, "learning_rate": 3.0849070722840234e-06, "loss": 0.718, "step": 9942 }, { "epoch": 0.64, "grad_norm": 1.7788380357280373, "learning_rate": 3.0839496049326745e-06, "loss": 0.6982, "step": 9943 }, { "epoch": 0.64, "grad_norm": 1.200510094278942, "learning_rate": 3.082992219929144e-06, "loss": 0.6732, "step": 9944 }, { "epoch": 0.64, "grad_norm": 1.5355913426380525, "learning_rate": 3.0820349173145823e-06, "loss": 0.669, "step": 9945 }, { "epoch": 0.64, "grad_norm": 1.6607292107351812, "learning_rate": 3.0810776971301283e-06, "loss": 0.7481, "step": 9946 }, { "epoch": 0.64, "grad_norm": 1.6798959374115354, "learning_rate": 3.080120559416924e-06, "loss": 0.8226, "step": 9947 }, { "epoch": 0.64, "grad_norm": 1.5405076833731848, "learning_rate": 3.079163504216102e-06, "loss": 0.7091, "step": 9948 }, { "epoch": 0.64, "grad_norm": 1.5259941176841365, "learning_rate": 3.0782065315687975e-06, "loss": 0.7107, "step": 9949 }, { "epoch": 0.64, "grad_norm": 1.4811335492387898, "learning_rate": 3.077249641516137e-06, "loss": 0.644, "step": 9950 }, { "epoch": 0.64, "grad_norm": 1.3527903088813147, "learning_rate": 3.0762928340992483e-06, "loss": 0.7101, "step": 9951 }, { "epoch": 0.64, "grad_norm": 1.6792594053298668, "learning_rate": 3.075336109359249e-06, "loss": 0.6729, "step": 9952 }, { "epoch": 0.64, "grad_norm": 1.8374756666117833, "learning_rate": 3.0743794673372605e-06, "loss": 0.6372, "step": 9953 }, { "epoch": 0.64, "grad_norm": 1.4775585686327108, "learning_rate": 3.073422908074398e-06, "loss": 0.6234, "step": 9954 }, { "epoch": 0.64, "grad_norm": 1.4953833010043254, "learning_rate": 3.072466431611768e-06, "loss": 0.5804, "step": 9955 }, { "epoch": 0.64, "grad_norm": 1.5714525697956336, "learning_rate": 3.071510037990482e-06, "loss": 0.6777, "step": 9956 }, { "epoch": 0.64, "grad_norm": 1.756081922304748, "learning_rate": 3.0705537272516406e-06, "loss": 0.6283, "step": 9957 }, { "epoch": 0.64, "grad_norm": 2.60491805751917, "learning_rate": 3.0695974994363487e-06, "loss": 0.644, "step": 9958 }, { "epoch": 0.64, "grad_norm": 1.3291441099240557, "learning_rate": 3.0686413545856975e-06, "loss": 0.6511, "step": 9959 }, { "epoch": 0.64, "grad_norm": 2.2144770704546994, "learning_rate": 3.067685292740784e-06, "loss": 0.7021, "step": 9960 }, { "epoch": 0.64, "grad_norm": 1.1786581607567728, "learning_rate": 3.0667293139426944e-06, "loss": 0.5787, "step": 9961 }, { "epoch": 0.64, "grad_norm": 1.671680700918395, "learning_rate": 3.065773418232517e-06, "loss": 0.6875, "step": 9962 }, { "epoch": 0.64, "grad_norm": 1.4978155056214755, "learning_rate": 3.064817605651336e-06, "loss": 0.6938, "step": 9963 }, { "epoch": 0.64, "grad_norm": 1.5026968781057413, "learning_rate": 3.063861876240225e-06, "loss": 0.6518, "step": 9964 }, { "epoch": 0.64, "grad_norm": 1.599343004638661, "learning_rate": 3.0629062300402636e-06, "loss": 0.6353, "step": 9965 }, { "epoch": 0.64, "grad_norm": 1.4670254982850102, "learning_rate": 3.0619506670925214e-06, "loss": 0.6224, "step": 9966 }, { "epoch": 0.64, "grad_norm": 1.1303851728671166, "learning_rate": 3.0609951874380685e-06, "loss": 0.6883, "step": 9967 }, { "epoch": 0.64, "grad_norm": 1.4383948923310608, "learning_rate": 3.0600397911179662e-06, "loss": 0.6414, "step": 9968 }, { "epoch": 0.64, "grad_norm": 1.3760642316491751, "learning_rate": 3.05908447817328e-06, "loss": 0.5837, "step": 9969 }, { "epoch": 0.64, "grad_norm": 1.188973623004409, "learning_rate": 3.0581292486450607e-06, "loss": 0.711, "step": 9970 }, { "epoch": 0.64, "grad_norm": 1.0103220411718963, "learning_rate": 3.057174102574367e-06, "loss": 0.6978, "step": 9971 }, { "epoch": 0.64, "grad_norm": 1.4938163583890667, "learning_rate": 3.0562190400022494e-06, "loss": 0.5984, "step": 9972 }, { "epoch": 0.64, "grad_norm": 1.123802177637954, "learning_rate": 3.0552640609697514e-06, "loss": 0.7819, "step": 9973 }, { "epoch": 0.64, "grad_norm": 1.545118649963644, "learning_rate": 3.0543091655179202e-06, "loss": 0.6807, "step": 9974 }, { "epoch": 0.64, "grad_norm": 1.5462829518379564, "learning_rate": 3.0533543536877897e-06, "loss": 0.643, "step": 9975 }, { "epoch": 0.64, "grad_norm": 1.4681240663597594, "learning_rate": 3.0523996255204014e-06, "loss": 0.6426, "step": 9976 }, { "epoch": 0.64, "grad_norm": 1.5870506339607917, "learning_rate": 3.0514449810567827e-06, "loss": 0.6898, "step": 9977 }, { "epoch": 0.64, "grad_norm": 1.4896661819384387, "learning_rate": 3.0504904203379647e-06, "loss": 0.715, "step": 9978 }, { "epoch": 0.64, "grad_norm": 1.4695896832775568, "learning_rate": 3.049535943404971e-06, "loss": 0.6795, "step": 9979 }, { "epoch": 0.64, "grad_norm": 1.3767719871783788, "learning_rate": 3.048581550298824e-06, "loss": 0.5913, "step": 9980 }, { "epoch": 0.64, "grad_norm": 1.6021646021990719, "learning_rate": 3.047627241060544e-06, "loss": 0.6398, "step": 9981 }, { "epoch": 0.64, "grad_norm": 1.958876346904143, "learning_rate": 3.0466730157311402e-06, "loss": 0.6523, "step": 9982 }, { "epoch": 0.64, "grad_norm": 1.5754134967251423, "learning_rate": 3.0457188743516275e-06, "loss": 0.6011, "step": 9983 }, { "epoch": 0.64, "grad_norm": 1.0594286615400839, "learning_rate": 3.0447648169630094e-06, "loss": 0.6464, "step": 9984 }, { "epoch": 0.64, "grad_norm": 1.374994165791574, "learning_rate": 3.043810843606292e-06, "loss": 0.6551, "step": 9985 }, { "epoch": 0.64, "grad_norm": 1.5965220930191797, "learning_rate": 3.0428569543224727e-06, "loss": 0.7389, "step": 9986 }, { "epoch": 0.64, "grad_norm": 1.515983918950328, "learning_rate": 3.041903149152552e-06, "loss": 0.6092, "step": 9987 }, { "epoch": 0.64, "grad_norm": 1.868665362686118, "learning_rate": 3.0409494281375163e-06, "loss": 0.6623, "step": 9988 }, { "epoch": 0.64, "grad_norm": 1.3636243568707602, "learning_rate": 3.039995791318358e-06, "loss": 0.5431, "step": 9989 }, { "epoch": 0.64, "grad_norm": 1.6542470450917528, "learning_rate": 3.039042238736064e-06, "loss": 0.7377, "step": 9990 }, { "epoch": 0.64, "grad_norm": 1.6595310022101104, "learning_rate": 3.038088770431612e-06, "loss": 0.6815, "step": 9991 }, { "epoch": 0.64, "grad_norm": 1.5173671154510027, "learning_rate": 3.037135386445983e-06, "loss": 0.7146, "step": 9992 }, { "epoch": 0.64, "grad_norm": 1.8220905413638093, "learning_rate": 3.0361820868201497e-06, "loss": 0.6557, "step": 9993 }, { "epoch": 0.64, "grad_norm": 1.4798862054073332, "learning_rate": 3.0352288715950864e-06, "loss": 0.6879, "step": 9994 }, { "epoch": 0.64, "grad_norm": 1.6636894973697216, "learning_rate": 3.0342757408117544e-06, "loss": 0.6098, "step": 9995 }, { "epoch": 0.64, "grad_norm": 1.5803733459740228, "learning_rate": 3.033322694511124e-06, "loss": 0.6603, "step": 9996 }, { "epoch": 0.64, "grad_norm": 1.578483840949159, "learning_rate": 3.0323697327341483e-06, "loss": 0.6633, "step": 9997 }, { "epoch": 0.64, "grad_norm": 1.5000718055179227, "learning_rate": 3.031416855521788e-06, "loss": 0.6446, "step": 9998 }, { "epoch": 0.64, "grad_norm": 1.5341324284430997, "learning_rate": 3.030464062914995e-06, "loss": 0.775, "step": 9999 }, { "epoch": 0.64, "grad_norm": 1.5473068561558576, "learning_rate": 3.0295113549547174e-06, "loss": 0.6479, "step": 10000 }, { "epoch": 0.64, "grad_norm": 1.5089414982565297, "learning_rate": 3.028558731681902e-06, "loss": 0.6952, "step": 10001 }, { "epoch": 0.64, "grad_norm": 1.6662428733880983, "learning_rate": 3.0276061931374882e-06, "loss": 0.7312, "step": 10002 }, { "epoch": 0.64, "grad_norm": 2.4728210574169136, "learning_rate": 3.0266537393624185e-06, "loss": 0.6881, "step": 10003 }, { "epoch": 0.64, "grad_norm": 1.6246015972227028, "learning_rate": 3.0257013703976225e-06, "loss": 0.7206, "step": 10004 }, { "epoch": 0.64, "grad_norm": 1.399005941030217, "learning_rate": 3.024749086284034e-06, "loss": 0.6867, "step": 10005 }, { "epoch": 0.64, "grad_norm": 1.0806053306752954, "learning_rate": 3.023796887062578e-06, "loss": 0.6113, "step": 10006 }, { "epoch": 0.64, "grad_norm": 1.8595504665956422, "learning_rate": 3.02284477277418e-06, "loss": 0.7098, "step": 10007 }, { "epoch": 0.64, "grad_norm": 1.556516772495353, "learning_rate": 3.021892743459758e-06, "loss": 0.6836, "step": 10008 }, { "epoch": 0.64, "grad_norm": 1.4719696122502897, "learning_rate": 3.020940799160229e-06, "loss": 0.6223, "step": 10009 }, { "epoch": 0.64, "grad_norm": 1.561626560542048, "learning_rate": 3.0199889399165084e-06, "loss": 0.6425, "step": 10010 }, { "epoch": 0.64, "grad_norm": 1.531956358653039, "learning_rate": 3.0190371657695005e-06, "loss": 0.6956, "step": 10011 }, { "epoch": 0.64, "grad_norm": 1.3582241085729578, "learning_rate": 3.0180854767601153e-06, "loss": 0.6629, "step": 10012 }, { "epoch": 0.64, "grad_norm": 1.621555620848077, "learning_rate": 3.017133872929249e-06, "loss": 0.6825, "step": 10013 }, { "epoch": 0.64, "grad_norm": 1.526569615019418, "learning_rate": 3.016182354317803e-06, "loss": 0.7063, "step": 10014 }, { "epoch": 0.64, "grad_norm": 1.5779376509018759, "learning_rate": 3.0152309209666703e-06, "loss": 0.751, "step": 10015 }, { "epoch": 0.64, "grad_norm": 1.483247381070116, "learning_rate": 3.014279572916744e-06, "loss": 0.6328, "step": 10016 }, { "epoch": 0.64, "grad_norm": 1.5726750163009222, "learning_rate": 3.0133283102089067e-06, "loss": 0.6817, "step": 10017 }, { "epoch": 0.64, "grad_norm": 1.7692751645791496, "learning_rate": 3.0123771328840447e-06, "loss": 0.6617, "step": 10018 }, { "epoch": 0.64, "grad_norm": 1.5914950958695537, "learning_rate": 3.0114260409830386e-06, "loss": 0.6155, "step": 10019 }, { "epoch": 0.64, "grad_norm": 1.7485868740630033, "learning_rate": 3.0104750345467603e-06, "loss": 0.6466, "step": 10020 }, { "epoch": 0.64, "grad_norm": 1.7508059039227801, "learning_rate": 3.009524113616086e-06, "loss": 0.6619, "step": 10021 }, { "epoch": 0.64, "grad_norm": 1.7035788470080786, "learning_rate": 3.008573278231881e-06, "loss": 0.7387, "step": 10022 }, { "epoch": 0.64, "grad_norm": 1.1058935913907955, "learning_rate": 3.0076225284350147e-06, "loss": 0.6862, "step": 10023 }, { "epoch": 0.64, "grad_norm": 1.4872970485291963, "learning_rate": 3.006671864266344e-06, "loss": 0.7471, "step": 10024 }, { "epoch": 0.64, "grad_norm": 1.4344518370672223, "learning_rate": 3.005721285766729e-06, "loss": 0.659, "step": 10025 }, { "epoch": 0.64, "grad_norm": 1.577530486924193, "learning_rate": 3.004770792977021e-06, "loss": 0.6118, "step": 10026 }, { "epoch": 0.64, "grad_norm": 1.8501985671774899, "learning_rate": 3.0038203859380717e-06, "loss": 0.6253, "step": 10027 }, { "epoch": 0.64, "grad_norm": 1.518373985296577, "learning_rate": 3.002870064690729e-06, "loss": 0.6505, "step": 10028 }, { "epoch": 0.64, "grad_norm": 1.5651693457890985, "learning_rate": 3.0019198292758327e-06, "loss": 0.7011, "step": 10029 }, { "epoch": 0.64, "grad_norm": 1.2694819128743777, "learning_rate": 3.000969679734226e-06, "loss": 0.6222, "step": 10030 }, { "epoch": 0.64, "grad_norm": 1.3527243657745698, "learning_rate": 3.0000196161067398e-06, "loss": 0.6213, "step": 10031 }, { "epoch": 0.64, "grad_norm": 1.6432033552787935, "learning_rate": 2.99906963843421e-06, "loss": 0.6813, "step": 10032 }, { "epoch": 0.64, "grad_norm": 1.554823918825263, "learning_rate": 2.998119746757461e-06, "loss": 0.7168, "step": 10033 }, { "epoch": 0.64, "grad_norm": 2.0580188798216232, "learning_rate": 2.9971699411173196e-06, "loss": 0.6098, "step": 10034 }, { "epoch": 0.64, "grad_norm": 1.4187044606538641, "learning_rate": 2.9962202215546043e-06, "loss": 0.6305, "step": 10035 }, { "epoch": 0.64, "grad_norm": 1.2440910721380556, "learning_rate": 2.995270588110134e-06, "loss": 0.6809, "step": 10036 }, { "epoch": 0.64, "grad_norm": 0.9501986745294947, "learning_rate": 2.994321040824722e-06, "loss": 0.5551, "step": 10037 }, { "epoch": 0.64, "grad_norm": 1.425951668973809, "learning_rate": 2.993371579739176e-06, "loss": 0.6514, "step": 10038 }, { "epoch": 0.64, "grad_norm": 1.4613357412161407, "learning_rate": 2.9924222048943046e-06, "loss": 0.6725, "step": 10039 }, { "epoch": 0.64, "grad_norm": 1.4747115720754973, "learning_rate": 2.991472916330906e-06, "loss": 0.7526, "step": 10040 }, { "epoch": 0.64, "grad_norm": 1.496053338238094, "learning_rate": 2.990523714089785e-06, "loss": 0.61, "step": 10041 }, { "epoch": 0.64, "grad_norm": 0.953966056022229, "learning_rate": 2.9895745982117287e-06, "loss": 0.6934, "step": 10042 }, { "epoch": 0.64, "grad_norm": 1.8754829603646122, "learning_rate": 2.988625568737532e-06, "loss": 0.7223, "step": 10043 }, { "epoch": 0.64, "grad_norm": 1.4852963013485159, "learning_rate": 2.987676625707982e-06, "loss": 0.615, "step": 10044 }, { "epoch": 0.64, "grad_norm": 1.5138097597806455, "learning_rate": 2.986727769163862e-06, "loss": 0.694, "step": 10045 }, { "epoch": 0.64, "grad_norm": 1.608985862504282, "learning_rate": 2.985778999145954e-06, "loss": 0.7267, "step": 10046 }, { "epoch": 0.64, "grad_norm": 1.570843781661911, "learning_rate": 2.9848303156950297e-06, "loss": 0.7166, "step": 10047 }, { "epoch": 0.64, "grad_norm": 1.1935366261552705, "learning_rate": 2.9838817188518664e-06, "loss": 0.5927, "step": 10048 }, { "epoch": 0.64, "grad_norm": 1.5592703332915738, "learning_rate": 2.982933208657228e-06, "loss": 0.7587, "step": 10049 }, { "epoch": 0.64, "grad_norm": 1.5419479911885905, "learning_rate": 2.9819847851518833e-06, "loss": 0.6707, "step": 10050 }, { "epoch": 0.64, "grad_norm": 1.9960864673155696, "learning_rate": 2.9810364483765913e-06, "loss": 0.8329, "step": 10051 }, { "epoch": 0.64, "grad_norm": 1.159511331080694, "learning_rate": 2.980088198372112e-06, "loss": 0.5724, "step": 10052 }, { "epoch": 0.64, "grad_norm": 1.1161758164202942, "learning_rate": 2.9791400351791954e-06, "loss": 0.6071, "step": 10053 }, { "epoch": 0.64, "grad_norm": 1.6050377649337149, "learning_rate": 2.978191958838594e-06, "loss": 0.7303, "step": 10054 }, { "epoch": 0.64, "grad_norm": 1.7388660183469167, "learning_rate": 2.9772439693910554e-06, "loss": 0.6837, "step": 10055 }, { "epoch": 0.64, "grad_norm": 1.7059527371318863, "learning_rate": 2.9762960668773187e-06, "loss": 0.6133, "step": 10056 }, { "epoch": 0.64, "grad_norm": 1.4307686086934355, "learning_rate": 2.975348251338126e-06, "loss": 0.6038, "step": 10057 }, { "epoch": 0.64, "grad_norm": 1.7166141185020805, "learning_rate": 2.97440052281421e-06, "loss": 0.6301, "step": 10058 }, { "epoch": 0.64, "grad_norm": 2.0233891763706082, "learning_rate": 2.973452881346305e-06, "loss": 0.7051, "step": 10059 }, { "epoch": 0.64, "grad_norm": 1.6914426891533847, "learning_rate": 2.9725053269751348e-06, "loss": 0.6738, "step": 10060 }, { "epoch": 0.64, "grad_norm": 0.9608271534051274, "learning_rate": 2.9715578597414268e-06, "loss": 0.5365, "step": 10061 }, { "epoch": 0.64, "grad_norm": 1.409535311150185, "learning_rate": 2.970610479685898e-06, "loss": 0.6437, "step": 10062 }, { "epoch": 0.64, "grad_norm": 1.5641825937471499, "learning_rate": 2.9696631868492664e-06, "loss": 0.729, "step": 10063 }, { "epoch": 0.64, "grad_norm": 1.1089225863095598, "learning_rate": 2.9687159812722465e-06, "loss": 0.6312, "step": 10064 }, { "epoch": 0.64, "grad_norm": 1.0426386936113914, "learning_rate": 2.9677688629955438e-06, "loss": 0.7202, "step": 10065 }, { "epoch": 0.64, "grad_norm": 1.541944466210906, "learning_rate": 2.9668218320598667e-06, "loss": 0.6309, "step": 10066 }, { "epoch": 0.64, "grad_norm": 1.533726002567336, "learning_rate": 2.965874888505913e-06, "loss": 0.7395, "step": 10067 }, { "epoch": 0.64, "grad_norm": 1.490517346147684, "learning_rate": 2.964928032374385e-06, "loss": 0.6327, "step": 10068 }, { "epoch": 0.64, "grad_norm": 1.5309595133745648, "learning_rate": 2.9639812637059717e-06, "loss": 0.6546, "step": 10069 }, { "epoch": 0.64, "grad_norm": 1.5405175483425997, "learning_rate": 2.963034582541366e-06, "loss": 0.6044, "step": 10070 }, { "epoch": 0.64, "grad_norm": 1.9229932128633616, "learning_rate": 2.962087988921253e-06, "loss": 0.7184, "step": 10071 }, { "epoch": 0.64, "grad_norm": 1.093090743600758, "learning_rate": 2.9611414828863177e-06, "loss": 0.6053, "step": 10072 }, { "epoch": 0.64, "grad_norm": 2.4082717970407503, "learning_rate": 2.9601950644772352e-06, "loss": 0.7369, "step": 10073 }, { "epoch": 0.64, "grad_norm": 1.3183163821013397, "learning_rate": 2.959248733734683e-06, "loss": 0.6197, "step": 10074 }, { "epoch": 0.64, "grad_norm": 1.7331067900472754, "learning_rate": 2.958302490699334e-06, "loss": 0.6363, "step": 10075 }, { "epoch": 0.64, "grad_norm": 1.7686124691001945, "learning_rate": 2.957356335411852e-06, "loss": 0.6811, "step": 10076 }, { "epoch": 0.64, "grad_norm": 1.5116280221190135, "learning_rate": 2.9564102679129027e-06, "loss": 0.7028, "step": 10077 }, { "epoch": 0.65, "grad_norm": 1.5755386176202875, "learning_rate": 2.9554642882431463e-06, "loss": 0.7285, "step": 10078 }, { "epoch": 0.65, "grad_norm": 3.6227263131249696, "learning_rate": 2.954518396443239e-06, "loss": 0.6577, "step": 10079 }, { "epoch": 0.65, "grad_norm": 1.6569073603628832, "learning_rate": 2.9535725925538313e-06, "loss": 0.7381, "step": 10080 }, { "epoch": 0.65, "grad_norm": 1.5614545558958732, "learning_rate": 2.9526268766155753e-06, "loss": 0.671, "step": 10081 }, { "epoch": 0.65, "grad_norm": 1.3773612950695637, "learning_rate": 2.9516812486691126e-06, "loss": 0.7192, "step": 10082 }, { "epoch": 0.65, "grad_norm": 1.3746364955167247, "learning_rate": 2.9507357087550857e-06, "loss": 0.6428, "step": 10083 }, { "epoch": 0.65, "grad_norm": 1.5327448944220088, "learning_rate": 2.9497902569141335e-06, "loss": 0.6227, "step": 10084 }, { "epoch": 0.65, "grad_norm": 1.5276441454975291, "learning_rate": 2.948844893186886e-06, "loss": 0.6887, "step": 10085 }, { "epoch": 0.65, "grad_norm": 1.7037358185744114, "learning_rate": 2.9478996176139765e-06, "loss": 0.673, "step": 10086 }, { "epoch": 0.65, "grad_norm": 5.983744255688134, "learning_rate": 2.9469544302360283e-06, "loss": 0.6629, "step": 10087 }, { "epoch": 0.65, "grad_norm": 1.6253282632010908, "learning_rate": 2.946009331093666e-06, "loss": 0.7059, "step": 10088 }, { "epoch": 0.65, "grad_norm": 1.6215217764293204, "learning_rate": 2.945064320227505e-06, "loss": 0.5831, "step": 10089 }, { "epoch": 0.65, "grad_norm": 1.609362308062145, "learning_rate": 2.9441193976781637e-06, "loss": 0.618, "step": 10090 }, { "epoch": 0.65, "grad_norm": 1.4852237459793884, "learning_rate": 2.9431745634862484e-06, "loss": 0.688, "step": 10091 }, { "epoch": 0.65, "grad_norm": 1.0445962409974434, "learning_rate": 2.942229817692368e-06, "loss": 0.6098, "step": 10092 }, { "epoch": 0.65, "grad_norm": 1.55050455646917, "learning_rate": 2.9412851603371277e-06, "loss": 0.6537, "step": 10093 }, { "epoch": 0.65, "grad_norm": 1.683490419209962, "learning_rate": 2.9403405914611243e-06, "loss": 0.7158, "step": 10094 }, { "epoch": 0.65, "grad_norm": 1.138293924417641, "learning_rate": 2.9393961111049564e-06, "loss": 0.6197, "step": 10095 }, { "epoch": 0.65, "grad_norm": 1.3547806204628106, "learning_rate": 2.938451719309211e-06, "loss": 0.7649, "step": 10096 }, { "epoch": 0.65, "grad_norm": 1.4260264935335265, "learning_rate": 2.937507416114481e-06, "loss": 0.5911, "step": 10097 }, { "epoch": 0.65, "grad_norm": 1.4863360635379033, "learning_rate": 2.9365632015613467e-06, "loss": 0.6262, "step": 10098 }, { "epoch": 0.65, "grad_norm": 1.6690903780462794, "learning_rate": 2.9356190756903913e-06, "loss": 0.6596, "step": 10099 }, { "epoch": 0.65, "grad_norm": 1.4600279371980556, "learning_rate": 2.9346750385421887e-06, "loss": 0.6356, "step": 10100 }, { "epoch": 0.65, "grad_norm": 2.0539570706827823, "learning_rate": 2.9337310901573134e-06, "loss": 0.6635, "step": 10101 }, { "epoch": 0.65, "grad_norm": 1.0723235821889043, "learning_rate": 2.932787230576336e-06, "loss": 0.6425, "step": 10102 }, { "epoch": 0.65, "grad_norm": 1.6041527145912928, "learning_rate": 2.9318434598398173e-06, "loss": 0.6958, "step": 10103 }, { "epoch": 0.65, "grad_norm": 1.5228850825822329, "learning_rate": 2.9308997779883232e-06, "loss": 0.7028, "step": 10104 }, { "epoch": 0.65, "grad_norm": 1.6226545952969231, "learning_rate": 2.929956185062407e-06, "loss": 0.6116, "step": 10105 }, { "epoch": 0.65, "grad_norm": 1.7230461240944617, "learning_rate": 2.929012681102625e-06, "loss": 0.7176, "step": 10106 }, { "epoch": 0.65, "grad_norm": 1.6234939822153533, "learning_rate": 2.9280692661495247e-06, "loss": 0.6849, "step": 10107 }, { "epoch": 0.65, "grad_norm": 1.4551962055317231, "learning_rate": 2.927125940243656e-06, "loss": 0.6012, "step": 10108 }, { "epoch": 0.65, "grad_norm": 1.6421469589954574, "learning_rate": 2.9261827034255575e-06, "loss": 0.7402, "step": 10109 }, { "epoch": 0.65, "grad_norm": 1.0549738068442376, "learning_rate": 2.9252395557357675e-06, "loss": 0.5356, "step": 10110 }, { "epoch": 0.65, "grad_norm": 1.4085598262798888, "learning_rate": 2.9242964972148245e-06, "loss": 0.642, "step": 10111 }, { "epoch": 0.65, "grad_norm": 1.3477413600564447, "learning_rate": 2.923353527903254e-06, "loss": 0.5992, "step": 10112 }, { "epoch": 0.65, "grad_norm": 1.7267559495960538, "learning_rate": 2.922410647841586e-06, "loss": 0.6324, "step": 10113 }, { "epoch": 0.65, "grad_norm": 1.525746720961804, "learning_rate": 2.9214678570703436e-06, "loss": 0.6447, "step": 10114 }, { "epoch": 0.65, "grad_norm": 1.6744260617239293, "learning_rate": 2.920525155630044e-06, "loss": 0.7659, "step": 10115 }, { "epoch": 0.65, "grad_norm": 1.5751234832390733, "learning_rate": 2.9195825435612036e-06, "loss": 0.7047, "step": 10116 }, { "epoch": 0.65, "grad_norm": 1.0572575804362019, "learning_rate": 2.918640020904334e-06, "loss": 0.6319, "step": 10117 }, { "epoch": 0.65, "grad_norm": 1.6233310807162715, "learning_rate": 2.9176975876999434e-06, "loss": 0.7472, "step": 10118 }, { "epoch": 0.65, "grad_norm": 1.2303452472301466, "learning_rate": 2.9167552439885333e-06, "loss": 0.6283, "step": 10119 }, { "epoch": 0.65, "grad_norm": 1.7366141603580056, "learning_rate": 2.915812989810609e-06, "loss": 0.6223, "step": 10120 }, { "epoch": 0.65, "grad_norm": 1.4860691014541367, "learning_rate": 2.914870825206659e-06, "loss": 0.7639, "step": 10121 }, { "epoch": 0.65, "grad_norm": 1.7009145321446613, "learning_rate": 2.913928750217183e-06, "loss": 0.6032, "step": 10122 }, { "epoch": 0.65, "grad_norm": 1.4164567840010938, "learning_rate": 2.9129867648826623e-06, "loss": 0.6549, "step": 10123 }, { "epoch": 0.65, "grad_norm": 1.3323814350946803, "learning_rate": 2.9120448692435866e-06, "loss": 0.6065, "step": 10124 }, { "epoch": 0.65, "grad_norm": 1.6037146346532443, "learning_rate": 2.9111030633404354e-06, "loss": 0.6864, "step": 10125 }, { "epoch": 0.65, "grad_norm": 1.59224797874587, "learning_rate": 2.9101613472136846e-06, "loss": 0.7489, "step": 10126 }, { "epoch": 0.65, "grad_norm": 1.4749238303236658, "learning_rate": 2.9092197209038087e-06, "loss": 0.6007, "step": 10127 }, { "epoch": 0.65, "grad_norm": 1.5633403380247015, "learning_rate": 2.908278184451273e-06, "loss": 0.702, "step": 10128 }, { "epoch": 0.65, "grad_norm": 1.539474295378513, "learning_rate": 2.90733673789655e-06, "loss": 0.648, "step": 10129 }, { "epoch": 0.65, "grad_norm": 1.51626971623323, "learning_rate": 2.9063953812800925e-06, "loss": 0.6571, "step": 10130 }, { "epoch": 0.65, "grad_norm": 1.4073963047924327, "learning_rate": 2.905454114642365e-06, "loss": 0.6485, "step": 10131 }, { "epoch": 0.65, "grad_norm": 1.1441958352295702, "learning_rate": 2.9045129380238177e-06, "loss": 0.6672, "step": 10132 }, { "epoch": 0.65, "grad_norm": 1.171803206494873, "learning_rate": 2.9035718514649013e-06, "loss": 0.5314, "step": 10133 }, { "epoch": 0.65, "grad_norm": 1.5641539696557205, "learning_rate": 2.902630855006061e-06, "loss": 0.6735, "step": 10134 }, { "epoch": 0.65, "grad_norm": 1.4014043336653341, "learning_rate": 2.9016899486877404e-06, "loss": 0.6146, "step": 10135 }, { "epoch": 0.65, "grad_norm": 1.4602194066980467, "learning_rate": 2.9007491325503757e-06, "loss": 0.6758, "step": 10136 }, { "epoch": 0.65, "grad_norm": 1.1501474388699409, "learning_rate": 2.8998084066344005e-06, "loss": 0.6563, "step": 10137 }, { "epoch": 0.65, "grad_norm": 1.769892590686504, "learning_rate": 2.8988677709802514e-06, "loss": 0.691, "step": 10138 }, { "epoch": 0.65, "grad_norm": 1.3296523965980256, "learning_rate": 2.8979272256283453e-06, "loss": 0.6, "step": 10139 }, { "epoch": 0.65, "grad_norm": 1.9366367521791583, "learning_rate": 2.896986770619113e-06, "loss": 0.74, "step": 10140 }, { "epoch": 0.65, "grad_norm": 1.7217525894908317, "learning_rate": 2.89604640599297e-06, "loss": 0.6642, "step": 10141 }, { "epoch": 0.65, "grad_norm": 1.7459074568838717, "learning_rate": 2.8951061317903312e-06, "loss": 0.6599, "step": 10142 }, { "epoch": 0.65, "grad_norm": 1.3899563534648076, "learning_rate": 2.8941659480516083e-06, "loss": 0.6154, "step": 10143 }, { "epoch": 0.65, "grad_norm": 1.4292832967022127, "learning_rate": 2.8932258548172077e-06, "loss": 0.7256, "step": 10144 }, { "epoch": 0.65, "grad_norm": 1.5669138983164153, "learning_rate": 2.892285852127532e-06, "loss": 0.6331, "step": 10145 }, { "epoch": 0.65, "grad_norm": 1.6291762127473763, "learning_rate": 2.8913459400229825e-06, "loss": 0.674, "step": 10146 }, { "epoch": 0.65, "grad_norm": 1.333796280072353, "learning_rate": 2.8904061185439513e-06, "loss": 0.6757, "step": 10147 }, { "epoch": 0.65, "grad_norm": 1.1050408314205673, "learning_rate": 2.889466387730835e-06, "loss": 0.6936, "step": 10148 }, { "epoch": 0.65, "grad_norm": 1.5163229710608321, "learning_rate": 2.8885267476240173e-06, "loss": 0.6177, "step": 10149 }, { "epoch": 0.65, "grad_norm": 1.6028028796593432, "learning_rate": 2.8875871982638843e-06, "loss": 0.7186, "step": 10150 }, { "epoch": 0.65, "grad_norm": 1.5093821366655895, "learning_rate": 2.8866477396908143e-06, "loss": 0.7389, "step": 10151 }, { "epoch": 0.65, "grad_norm": 1.650824939082477, "learning_rate": 2.8857083719451835e-06, "loss": 0.6463, "step": 10152 }, { "epoch": 0.65, "grad_norm": 1.4082995154716837, "learning_rate": 2.8847690950673642e-06, "loss": 0.7147, "step": 10153 }, { "epoch": 0.65, "grad_norm": 1.6633038715787973, "learning_rate": 2.8838299090977224e-06, "loss": 0.6667, "step": 10154 }, { "epoch": 0.65, "grad_norm": 1.7117959054352332, "learning_rate": 2.882890814076629e-06, "loss": 0.6759, "step": 10155 }, { "epoch": 0.65, "grad_norm": 1.5232433817710203, "learning_rate": 2.881951810044435e-06, "loss": 0.6664, "step": 10156 }, { "epoch": 0.65, "grad_norm": 1.561453023804464, "learning_rate": 2.8810128970415035e-06, "loss": 0.6471, "step": 10157 }, { "epoch": 0.65, "grad_norm": 1.7972478881707712, "learning_rate": 2.880074075108186e-06, "loss": 0.628, "step": 10158 }, { "epoch": 0.65, "grad_norm": 1.36347637692045, "learning_rate": 2.879135344284829e-06, "loss": 0.6098, "step": 10159 }, { "epoch": 0.65, "grad_norm": 3.6678457744830686, "learning_rate": 2.8781967046117794e-06, "loss": 0.6541, "step": 10160 }, { "epoch": 0.65, "grad_norm": 1.6023566747520572, "learning_rate": 2.8772581561293743e-06, "loss": 0.6672, "step": 10161 }, { "epoch": 0.65, "grad_norm": 1.080165612740687, "learning_rate": 2.8763196988779573e-06, "loss": 0.6068, "step": 10162 }, { "epoch": 0.65, "grad_norm": 1.677418251336404, "learning_rate": 2.875381332897853e-06, "loss": 0.6403, "step": 10163 }, { "epoch": 0.65, "grad_norm": 1.1173642316617145, "learning_rate": 2.874443058229399e-06, "loss": 0.6618, "step": 10164 }, { "epoch": 0.65, "grad_norm": 1.5266938194887048, "learning_rate": 2.873504874912911e-06, "loss": 0.643, "step": 10165 }, { "epoch": 0.65, "grad_norm": 1.5191272377224494, "learning_rate": 2.872566782988718e-06, "loss": 0.6591, "step": 10166 }, { "epoch": 0.65, "grad_norm": 2.0121272013507965, "learning_rate": 2.8716287824971344e-06, "loss": 0.6301, "step": 10167 }, { "epoch": 0.65, "grad_norm": 1.5277114285539755, "learning_rate": 2.8706908734784713e-06, "loss": 0.6671, "step": 10168 }, { "epoch": 0.65, "grad_norm": 1.5846678608872227, "learning_rate": 2.869753055973044e-06, "loss": 0.6557, "step": 10169 }, { "epoch": 0.65, "grad_norm": 1.0606276644834018, "learning_rate": 2.8688153300211503e-06, "loss": 0.6509, "step": 10170 }, { "epoch": 0.65, "grad_norm": 1.381371351425361, "learning_rate": 2.8678776956630995e-06, "loss": 0.6771, "step": 10171 }, { "epoch": 0.65, "grad_norm": 1.5709831351087948, "learning_rate": 2.8669401529391804e-06, "loss": 0.6965, "step": 10172 }, { "epoch": 0.65, "grad_norm": 1.059821782696454, "learning_rate": 2.866002701889694e-06, "loss": 0.6187, "step": 10173 }, { "epoch": 0.65, "grad_norm": 1.5562238417921526, "learning_rate": 2.865065342554927e-06, "loss": 0.6204, "step": 10174 }, { "epoch": 0.65, "grad_norm": 1.9460712778729354, "learning_rate": 2.8641280749751655e-06, "loss": 0.7181, "step": 10175 }, { "epoch": 0.65, "grad_norm": 1.4369540321586278, "learning_rate": 2.8631908991906913e-06, "loss": 0.5752, "step": 10176 }, { "epoch": 0.65, "grad_norm": 1.5285391070749872, "learning_rate": 2.8622538152417804e-06, "loss": 0.6443, "step": 10177 }, { "epoch": 0.65, "grad_norm": 1.109081113986175, "learning_rate": 2.8613168231687126e-06, "loss": 0.7077, "step": 10178 }, { "epoch": 0.65, "grad_norm": 1.975821129198486, "learning_rate": 2.8603799230117495e-06, "loss": 0.631, "step": 10179 }, { "epoch": 0.65, "grad_norm": 1.5253612224371758, "learning_rate": 2.8594431148111658e-06, "loss": 0.6993, "step": 10180 }, { "epoch": 0.65, "grad_norm": 1.8603896938500715, "learning_rate": 2.858506398607214e-06, "loss": 0.5982, "step": 10181 }, { "epoch": 0.65, "grad_norm": 1.500448030446988, "learning_rate": 2.8575697744401596e-06, "loss": 0.6009, "step": 10182 }, { "epoch": 0.65, "grad_norm": 1.8090069550401382, "learning_rate": 2.8566332423502553e-06, "loss": 0.6494, "step": 10183 }, { "epoch": 0.65, "grad_norm": 1.527038807023037, "learning_rate": 2.8556968023777476e-06, "loss": 0.6887, "step": 10184 }, { "epoch": 0.65, "grad_norm": 1.61699600171167, "learning_rate": 2.8547604545628903e-06, "loss": 0.6332, "step": 10185 }, { "epoch": 0.65, "grad_norm": 1.749971472634587, "learning_rate": 2.8538241989459164e-06, "loss": 0.631, "step": 10186 }, { "epoch": 0.65, "grad_norm": 1.6104505484545073, "learning_rate": 2.852888035567073e-06, "loss": 0.6297, "step": 10187 }, { "epoch": 0.65, "grad_norm": 1.5941071890752103, "learning_rate": 2.8519519644665853e-06, "loss": 0.5886, "step": 10188 }, { "epoch": 0.65, "grad_norm": 1.4201327643273318, "learning_rate": 2.851015985684691e-06, "loss": 0.5698, "step": 10189 }, { "epoch": 0.65, "grad_norm": 1.591305405002114, "learning_rate": 2.850080099261614e-06, "loss": 0.7186, "step": 10190 }, { "epoch": 0.65, "grad_norm": 1.4203347914148878, "learning_rate": 2.8491443052375767e-06, "loss": 0.6679, "step": 10191 }, { "epoch": 0.65, "grad_norm": 1.6471135870526554, "learning_rate": 2.8482086036527978e-06, "loss": 0.6208, "step": 10192 }, { "epoch": 0.65, "grad_norm": 1.5692343182591564, "learning_rate": 2.84727299454749e-06, "loss": 0.6658, "step": 10193 }, { "epoch": 0.65, "grad_norm": 1.6943095671535533, "learning_rate": 2.8463374779618687e-06, "loss": 0.737, "step": 10194 }, { "epoch": 0.65, "grad_norm": 2.1564173460345906, "learning_rate": 2.845402053936133e-06, "loss": 0.6578, "step": 10195 }, { "epoch": 0.65, "grad_norm": 1.5371945636751638, "learning_rate": 2.8444667225104917e-06, "loss": 0.5762, "step": 10196 }, { "epoch": 0.65, "grad_norm": 1.554314781164039, "learning_rate": 2.8435314837251414e-06, "loss": 0.5918, "step": 10197 }, { "epoch": 0.65, "grad_norm": 1.5977661437366775, "learning_rate": 2.842596337620277e-06, "loss": 0.6781, "step": 10198 }, { "epoch": 0.65, "grad_norm": 1.571522286458811, "learning_rate": 2.841661284236088e-06, "loss": 0.6693, "step": 10199 }, { "epoch": 0.65, "grad_norm": 1.41947674500887, "learning_rate": 2.840726323612762e-06, "loss": 0.6622, "step": 10200 }, { "epoch": 0.65, "grad_norm": 1.7149037746878308, "learning_rate": 2.8397914557904816e-06, "loss": 0.6688, "step": 10201 }, { "epoch": 0.65, "grad_norm": 1.348576359931186, "learning_rate": 2.8388566808094226e-06, "loss": 0.5917, "step": 10202 }, { "epoch": 0.65, "grad_norm": 1.5558697543327213, "learning_rate": 2.837921998709765e-06, "loss": 0.6268, "step": 10203 }, { "epoch": 0.65, "grad_norm": 1.504844856172974, "learning_rate": 2.8369874095316763e-06, "loss": 0.6741, "step": 10204 }, { "epoch": 0.65, "grad_norm": 1.314147130177445, "learning_rate": 2.8360529133153237e-06, "loss": 0.6601, "step": 10205 }, { "epoch": 0.65, "grad_norm": 1.7653476856780332, "learning_rate": 2.8351185101008695e-06, "loss": 0.7343, "step": 10206 }, { "epoch": 0.65, "grad_norm": 1.39573278274052, "learning_rate": 2.8341841999284725e-06, "loss": 0.7055, "step": 10207 }, { "epoch": 0.65, "grad_norm": 1.7677997245315429, "learning_rate": 2.8332499828382887e-06, "loss": 0.7428, "step": 10208 }, { "epoch": 0.65, "grad_norm": 1.6051514601063452, "learning_rate": 2.832315858870467e-06, "loss": 0.6603, "step": 10209 }, { "epoch": 0.65, "grad_norm": 1.5379952895628333, "learning_rate": 2.8313818280651528e-06, "loss": 0.7071, "step": 10210 }, { "epoch": 0.65, "grad_norm": 1.5228447522438866, "learning_rate": 2.830447890462492e-06, "loss": 0.665, "step": 10211 }, { "epoch": 0.65, "grad_norm": 1.488628148302286, "learning_rate": 2.8295140461026232e-06, "loss": 0.7147, "step": 10212 }, { "epoch": 0.65, "grad_norm": 1.4910836734725443, "learning_rate": 2.828580295025679e-06, "loss": 0.643, "step": 10213 }, { "epoch": 0.65, "grad_norm": 1.440704984887936, "learning_rate": 2.8276466372717916e-06, "loss": 0.6953, "step": 10214 }, { "epoch": 0.65, "grad_norm": 1.6124677756390593, "learning_rate": 2.8267130728810866e-06, "loss": 0.6516, "step": 10215 }, { "epoch": 0.65, "grad_norm": 1.3980454221397636, "learning_rate": 2.8257796018936872e-06, "loss": 0.601, "step": 10216 }, { "epoch": 0.65, "grad_norm": 1.6266667269523096, "learning_rate": 2.824846224349712e-06, "loss": 0.6636, "step": 10217 }, { "epoch": 0.65, "grad_norm": 1.3879953428860001, "learning_rate": 2.823912940289275e-06, "loss": 0.7038, "step": 10218 }, { "epoch": 0.65, "grad_norm": 1.4813108241175106, "learning_rate": 2.822979749752486e-06, "loss": 0.6748, "step": 10219 }, { "epoch": 0.65, "grad_norm": 1.5435624541557988, "learning_rate": 2.8220466527794555e-06, "loss": 0.7139, "step": 10220 }, { "epoch": 0.65, "grad_norm": 1.7895788763425577, "learning_rate": 2.821113649410281e-06, "loss": 0.6654, "step": 10221 }, { "epoch": 0.65, "grad_norm": 1.360780004223033, "learning_rate": 2.820180739685064e-06, "loss": 0.6069, "step": 10222 }, { "epoch": 0.65, "grad_norm": 2.6579544722271478, "learning_rate": 2.8192479236438987e-06, "loss": 0.6384, "step": 10223 }, { "epoch": 0.65, "grad_norm": 1.4763380779018795, "learning_rate": 2.8183152013268757e-06, "loss": 0.5889, "step": 10224 }, { "epoch": 0.65, "grad_norm": 1.5658730740057771, "learning_rate": 2.8173825727740808e-06, "loss": 0.6883, "step": 10225 }, { "epoch": 0.65, "grad_norm": 1.5514350557265093, "learning_rate": 2.8164500380255942e-06, "loss": 0.654, "step": 10226 }, { "epoch": 0.65, "grad_norm": 1.849103669617276, "learning_rate": 2.815517597121501e-06, "loss": 0.7946, "step": 10227 }, { "epoch": 0.65, "grad_norm": 2.0542542979383596, "learning_rate": 2.8145852501018678e-06, "loss": 0.8237, "step": 10228 }, { "epoch": 0.65, "grad_norm": 1.5540275182075691, "learning_rate": 2.8136529970067715e-06, "loss": 0.6323, "step": 10229 }, { "epoch": 0.65, "grad_norm": 1.6019124754313374, "learning_rate": 2.8127208378762715e-06, "loss": 0.6447, "step": 10230 }, { "epoch": 0.65, "grad_norm": 1.6572911561481791, "learning_rate": 2.8117887727504355e-06, "loss": 0.7203, "step": 10231 }, { "epoch": 0.65, "grad_norm": 1.556704075538306, "learning_rate": 2.8108568016693203e-06, "loss": 0.6162, "step": 10232 }, { "epoch": 0.65, "grad_norm": 1.6851758413399165, "learning_rate": 2.809924924672978e-06, "loss": 0.6872, "step": 10233 }, { "epoch": 0.66, "grad_norm": 1.535534526457462, "learning_rate": 2.808993141801465e-06, "loss": 0.6331, "step": 10234 }, { "epoch": 0.66, "grad_norm": 1.5659269977934447, "learning_rate": 2.8080614530948174e-06, "loss": 0.6552, "step": 10235 }, { "epoch": 0.66, "grad_norm": 1.6025845676952835, "learning_rate": 2.8071298585930873e-06, "loss": 0.7094, "step": 10236 }, { "epoch": 0.66, "grad_norm": 1.5217881491604182, "learning_rate": 2.806198358336304e-06, "loss": 0.7278, "step": 10237 }, { "epoch": 0.66, "grad_norm": 1.6200292449574083, "learning_rate": 2.8052669523645075e-06, "loss": 0.645, "step": 10238 }, { "epoch": 0.66, "grad_norm": 1.622072492821296, "learning_rate": 2.804335640717726e-06, "loss": 0.6049, "step": 10239 }, { "epoch": 0.66, "grad_norm": 1.4121018914781982, "learning_rate": 2.8034044234359824e-06, "loss": 0.6413, "step": 10240 }, { "epoch": 0.66, "grad_norm": 2.0477756924608257, "learning_rate": 2.802473300559305e-06, "loss": 0.7431, "step": 10241 }, { "epoch": 0.66, "grad_norm": 1.516881301542089, "learning_rate": 2.8015422721277036e-06, "loss": 0.6275, "step": 10242 }, { "epoch": 0.66, "grad_norm": 1.7643103846901176, "learning_rate": 2.8006113381812e-06, "loss": 0.6872, "step": 10243 }, { "epoch": 0.66, "grad_norm": 1.4477780058923404, "learning_rate": 2.799680498759795e-06, "loss": 0.693, "step": 10244 }, { "epoch": 0.66, "grad_norm": 1.423778057065856, "learning_rate": 2.798749753903501e-06, "loss": 0.6281, "step": 10245 }, { "epoch": 0.66, "grad_norm": 1.6341840366295246, "learning_rate": 2.797819103652317e-06, "loss": 0.8531, "step": 10246 }, { "epoch": 0.66, "grad_norm": 1.592578256088956, "learning_rate": 2.7968885480462406e-06, "loss": 0.6524, "step": 10247 }, { "epoch": 0.66, "grad_norm": 1.5574522487072642, "learning_rate": 2.7959580871252657e-06, "loss": 0.649, "step": 10248 }, { "epoch": 0.66, "grad_norm": 1.433725732404011, "learning_rate": 2.7950277209293774e-06, "loss": 0.6251, "step": 10249 }, { "epoch": 0.66, "grad_norm": 1.5482800093208833, "learning_rate": 2.7940974494985695e-06, "loss": 0.6966, "step": 10250 }, { "epoch": 0.66, "grad_norm": 1.6509460448396314, "learning_rate": 2.7931672728728137e-06, "loss": 0.7065, "step": 10251 }, { "epoch": 0.66, "grad_norm": 1.4052915261542556, "learning_rate": 2.7922371910920933e-06, "loss": 0.6454, "step": 10252 }, { "epoch": 0.66, "grad_norm": 1.5769791186154527, "learning_rate": 2.7913072041963797e-06, "loss": 0.7826, "step": 10253 }, { "epoch": 0.66, "grad_norm": 1.4263904848693583, "learning_rate": 2.7903773122256405e-06, "loss": 0.6699, "step": 10254 }, { "epoch": 0.66, "grad_norm": 1.5833440448758056, "learning_rate": 2.789447515219842e-06, "loss": 0.6358, "step": 10255 }, { "epoch": 0.66, "grad_norm": 1.5396639941584402, "learning_rate": 2.788517813218944e-06, "loss": 0.6439, "step": 10256 }, { "epoch": 0.66, "grad_norm": 1.5785253075336263, "learning_rate": 2.7875882062629033e-06, "loss": 0.5881, "step": 10257 }, { "epoch": 0.66, "grad_norm": 1.5664390481158132, "learning_rate": 2.786658694391671e-06, "loss": 0.5686, "step": 10258 }, { "epoch": 0.66, "grad_norm": 1.0673194191864372, "learning_rate": 2.7857292776452003e-06, "loss": 0.6371, "step": 10259 }, { "epoch": 0.66, "grad_norm": 1.525412175466259, "learning_rate": 2.784799956063429e-06, "loss": 0.7439, "step": 10260 }, { "epoch": 0.66, "grad_norm": 1.5759719502766472, "learning_rate": 2.783870729686303e-06, "loss": 0.7098, "step": 10261 }, { "epoch": 0.66, "grad_norm": 1.5698580012668488, "learning_rate": 2.7829415985537567e-06, "loss": 0.6964, "step": 10262 }, { "epoch": 0.66, "grad_norm": 1.5231178206066518, "learning_rate": 2.782012562705721e-06, "loss": 0.6153, "step": 10263 }, { "epoch": 0.66, "grad_norm": 1.5541037740037371, "learning_rate": 2.781083622182125e-06, "loss": 0.6856, "step": 10264 }, { "epoch": 0.66, "grad_norm": 1.3998698609777258, "learning_rate": 2.780154777022892e-06, "loss": 0.63, "step": 10265 }, { "epoch": 0.66, "grad_norm": 1.054549859946416, "learning_rate": 2.779226027267943e-06, "loss": 0.6744, "step": 10266 }, { "epoch": 0.66, "grad_norm": 1.5035321212127635, "learning_rate": 2.77829737295719e-06, "loss": 0.6155, "step": 10267 }, { "epoch": 0.66, "grad_norm": 1.4022461695047437, "learning_rate": 2.77736881413055e-06, "loss": 0.6267, "step": 10268 }, { "epoch": 0.66, "grad_norm": 1.492236950879918, "learning_rate": 2.7764403508279275e-06, "loss": 0.6666, "step": 10269 }, { "epoch": 0.66, "grad_norm": 1.457479170489369, "learning_rate": 2.7755119830892263e-06, "loss": 0.7222, "step": 10270 }, { "epoch": 0.66, "grad_norm": 1.4382191694802724, "learning_rate": 2.774583710954346e-06, "loss": 0.6816, "step": 10271 }, { "epoch": 0.66, "grad_norm": 1.478467925592203, "learning_rate": 2.7736555344631823e-06, "loss": 0.6462, "step": 10272 }, { "epoch": 0.66, "grad_norm": 1.1855920042897008, "learning_rate": 2.7727274536556248e-06, "loss": 0.6713, "step": 10273 }, { "epoch": 0.66, "grad_norm": 1.4625525118592722, "learning_rate": 2.771799468571561e-06, "loss": 0.6369, "step": 10274 }, { "epoch": 0.66, "grad_norm": 1.5773258303489792, "learning_rate": 2.770871579250872e-06, "loss": 0.6931, "step": 10275 }, { "epoch": 0.66, "grad_norm": 1.4770218325415068, "learning_rate": 2.769943785733441e-06, "loss": 0.6423, "step": 10276 }, { "epoch": 0.66, "grad_norm": 1.1886966007935547, "learning_rate": 2.76901608805914e-06, "loss": 0.6817, "step": 10277 }, { "epoch": 0.66, "grad_norm": 2.578366618575618, "learning_rate": 2.768088486267839e-06, "loss": 0.6876, "step": 10278 }, { "epoch": 0.66, "grad_norm": 1.5561538598674036, "learning_rate": 2.767160980399406e-06, "loss": 0.6632, "step": 10279 }, { "epoch": 0.66, "grad_norm": 1.4639207200512145, "learning_rate": 2.7662335704937015e-06, "loss": 0.6337, "step": 10280 }, { "epoch": 0.66, "grad_norm": 1.9258028775070508, "learning_rate": 2.7653062565905842e-06, "loss": 0.7263, "step": 10281 }, { "epoch": 0.66, "grad_norm": 1.3878526086372491, "learning_rate": 2.764379038729907e-06, "loss": 0.5619, "step": 10282 }, { "epoch": 0.66, "grad_norm": 1.5674029408418784, "learning_rate": 2.7634519169515237e-06, "loss": 0.6371, "step": 10283 }, { "epoch": 0.66, "grad_norm": 1.4263081178740427, "learning_rate": 2.7625248912952753e-06, "loss": 0.6708, "step": 10284 }, { "epoch": 0.66, "grad_norm": 2.605279834031519, "learning_rate": 2.7615979618010057e-06, "loss": 0.6732, "step": 10285 }, { "epoch": 0.66, "grad_norm": 1.410783162571774, "learning_rate": 2.760671128508553e-06, "loss": 0.625, "step": 10286 }, { "epoch": 0.66, "grad_norm": 1.5820940650830266, "learning_rate": 2.759744391457749e-06, "loss": 0.6565, "step": 10287 }, { "epoch": 0.66, "grad_norm": 1.533816140639556, "learning_rate": 2.758817750688425e-06, "loss": 0.6317, "step": 10288 }, { "epoch": 0.66, "grad_norm": 1.691792261255558, "learning_rate": 2.757891206240404e-06, "loss": 0.6954, "step": 10289 }, { "epoch": 0.66, "grad_norm": 1.363714187589867, "learning_rate": 2.7569647581535077e-06, "loss": 0.6954, "step": 10290 }, { "epoch": 0.66, "grad_norm": 1.877718878226622, "learning_rate": 2.7560384064675505e-06, "loss": 0.6537, "step": 10291 }, { "epoch": 0.66, "grad_norm": 1.0883470390250647, "learning_rate": 2.755112151222351e-06, "loss": 0.7429, "step": 10292 }, { "epoch": 0.66, "grad_norm": 1.9546768101996321, "learning_rate": 2.7541859924577106e-06, "loss": 0.6111, "step": 10293 }, { "epoch": 0.66, "grad_norm": 1.4346091321836645, "learning_rate": 2.753259930213442e-06, "loss": 0.6745, "step": 10294 }, { "epoch": 0.66, "grad_norm": 1.5191267672278244, "learning_rate": 2.7523339645293357e-06, "loss": 0.6926, "step": 10295 }, { "epoch": 0.66, "grad_norm": 1.4839809654171376, "learning_rate": 2.7514080954451944e-06, "loss": 0.7138, "step": 10296 }, { "epoch": 0.66, "grad_norm": 1.6213954191817546, "learning_rate": 2.750482323000808e-06, "loss": 0.5852, "step": 10297 }, { "epoch": 0.66, "grad_norm": 1.838919842523958, "learning_rate": 2.7495566472359638e-06, "loss": 0.6954, "step": 10298 }, { "epoch": 0.66, "grad_norm": 1.1878739882020843, "learning_rate": 2.748631068190449e-06, "loss": 0.5962, "step": 10299 }, { "epoch": 0.66, "grad_norm": 1.507533073129615, "learning_rate": 2.747705585904036e-06, "loss": 0.7157, "step": 10300 }, { "epoch": 0.66, "grad_norm": 1.4212662357533241, "learning_rate": 2.746780200416509e-06, "loss": 0.6557, "step": 10301 }, { "epoch": 0.66, "grad_norm": 1.0364326991242527, "learning_rate": 2.7458549117676294e-06, "loss": 0.5969, "step": 10302 }, { "epoch": 0.66, "grad_norm": 1.526861105965245, "learning_rate": 2.7449297199971713e-06, "loss": 0.6317, "step": 10303 }, { "epoch": 0.66, "grad_norm": 1.7397664645286188, "learning_rate": 2.744004625144896e-06, "loss": 0.6808, "step": 10304 }, { "epoch": 0.66, "grad_norm": 1.5046023878764376, "learning_rate": 2.743079627250559e-06, "loss": 0.5988, "step": 10305 }, { "epoch": 0.66, "grad_norm": 1.4448609659959142, "learning_rate": 2.7421547263539205e-06, "loss": 0.6378, "step": 10306 }, { "epoch": 0.66, "grad_norm": 1.615765093443191, "learning_rate": 2.741229922494724e-06, "loss": 0.6492, "step": 10307 }, { "epoch": 0.66, "grad_norm": 1.7238502768128658, "learning_rate": 2.740305215712723e-06, "loss": 0.6991, "step": 10308 }, { "epoch": 0.66, "grad_norm": 1.525384449350898, "learning_rate": 2.7393806060476525e-06, "loss": 0.6582, "step": 10309 }, { "epoch": 0.66, "grad_norm": 1.5614073362624126, "learning_rate": 2.738456093539254e-06, "loss": 0.6198, "step": 10310 }, { "epoch": 0.66, "grad_norm": 1.5813696549352871, "learning_rate": 2.737531678227261e-06, "loss": 0.6822, "step": 10311 }, { "epoch": 0.66, "grad_norm": 1.6270666746422526, "learning_rate": 2.736607360151402e-06, "loss": 0.6685, "step": 10312 }, { "epoch": 0.66, "grad_norm": 1.711644011699847, "learning_rate": 2.7356831393514033e-06, "loss": 0.6504, "step": 10313 }, { "epoch": 0.66, "grad_norm": 1.5681516706254874, "learning_rate": 2.734759015866983e-06, "loss": 0.7009, "step": 10314 }, { "epoch": 0.66, "grad_norm": 1.6881078085203876, "learning_rate": 2.733834989737864e-06, "loss": 0.6766, "step": 10315 }, { "epoch": 0.66, "grad_norm": 1.3877130676990987, "learning_rate": 2.732911061003752e-06, "loss": 0.6344, "step": 10316 }, { "epoch": 0.66, "grad_norm": 1.5322668457975541, "learning_rate": 2.7319872297043604e-06, "loss": 0.6309, "step": 10317 }, { "epoch": 0.66, "grad_norm": 1.517890589641351, "learning_rate": 2.7310634958793915e-06, "loss": 0.6101, "step": 10318 }, { "epoch": 0.66, "grad_norm": 1.414329438739804, "learning_rate": 2.7301398595685456e-06, "loss": 0.7407, "step": 10319 }, { "epoch": 0.66, "grad_norm": 1.4255833785939807, "learning_rate": 2.72921632081152e-06, "loss": 0.6045, "step": 10320 }, { "epoch": 0.66, "grad_norm": 2.4190493574842837, "learning_rate": 2.7282928796480047e-06, "loss": 0.6881, "step": 10321 }, { "epoch": 0.66, "grad_norm": 1.5199661172741943, "learning_rate": 2.7273695361176876e-06, "loss": 0.672, "step": 10322 }, { "epoch": 0.66, "grad_norm": 1.3860414811075137, "learning_rate": 2.726446290260251e-06, "loss": 0.6081, "step": 10323 }, { "epoch": 0.66, "grad_norm": 1.4166650766025208, "learning_rate": 2.725523142115377e-06, "loss": 0.6589, "step": 10324 }, { "epoch": 0.66, "grad_norm": 1.1754550848696972, "learning_rate": 2.72460009172274e-06, "loss": 0.6333, "step": 10325 }, { "epoch": 0.66, "grad_norm": 1.5467129666206756, "learning_rate": 2.7236771391220084e-06, "loss": 0.6614, "step": 10326 }, { "epoch": 0.66, "grad_norm": 1.5122436108735, "learning_rate": 2.722754284352851e-06, "loss": 0.6444, "step": 10327 }, { "epoch": 0.66, "grad_norm": 1.440055072281479, "learning_rate": 2.7218315274549296e-06, "loss": 0.6978, "step": 10328 }, { "epoch": 0.66, "grad_norm": 1.6518877307274793, "learning_rate": 2.7209088684679015e-06, "loss": 0.625, "step": 10329 }, { "epoch": 0.66, "grad_norm": 1.5136381168042035, "learning_rate": 2.7199863074314216e-06, "loss": 0.5928, "step": 10330 }, { "epoch": 0.66, "grad_norm": 1.0476296178224636, "learning_rate": 2.719063844385138e-06, "loss": 0.6173, "step": 10331 }, { "epoch": 0.66, "grad_norm": 1.594434997575818, "learning_rate": 2.7181414793686966e-06, "loss": 0.6385, "step": 10332 }, { "epoch": 0.66, "grad_norm": 1.3550558033519355, "learning_rate": 2.7172192124217418e-06, "loss": 0.6887, "step": 10333 }, { "epoch": 0.66, "grad_norm": 0.9978802568511469, "learning_rate": 2.7162970435839074e-06, "loss": 0.5982, "step": 10334 }, { "epoch": 0.66, "grad_norm": 1.076880683598875, "learning_rate": 2.7153749728948286e-06, "loss": 0.6754, "step": 10335 }, { "epoch": 0.66, "grad_norm": 1.5128337261044924, "learning_rate": 2.714453000394132e-06, "loss": 0.7225, "step": 10336 }, { "epoch": 0.66, "grad_norm": 2.421043077302836, "learning_rate": 2.7135311261214425e-06, "loss": 0.6444, "step": 10337 }, { "epoch": 0.66, "grad_norm": 1.3995563844254262, "learning_rate": 2.712609350116382e-06, "loss": 0.6639, "step": 10338 }, { "epoch": 0.66, "grad_norm": 1.0628784384238994, "learning_rate": 2.711687672418564e-06, "loss": 0.6805, "step": 10339 }, { "epoch": 0.66, "grad_norm": 1.6147759801502788, "learning_rate": 2.7107660930675992e-06, "loss": 0.7199, "step": 10340 }, { "epoch": 0.66, "grad_norm": 1.517368567955587, "learning_rate": 2.7098446121031008e-06, "loss": 0.6779, "step": 10341 }, { "epoch": 0.66, "grad_norm": 1.5139372093941543, "learning_rate": 2.708923229564667e-06, "loss": 0.6951, "step": 10342 }, { "epoch": 0.66, "grad_norm": 1.4552284464292342, "learning_rate": 2.7080019454919e-06, "loss": 0.6997, "step": 10343 }, { "epoch": 0.66, "grad_norm": 1.7830860186245128, "learning_rate": 2.7070807599243925e-06, "loss": 0.6763, "step": 10344 }, { "epoch": 0.66, "grad_norm": 1.5390731815608338, "learning_rate": 2.706159672901736e-06, "loss": 0.6119, "step": 10345 }, { "epoch": 0.66, "grad_norm": 1.4620337584497944, "learning_rate": 2.705238684463517e-06, "loss": 0.674, "step": 10346 }, { "epoch": 0.66, "grad_norm": 1.4979326868627223, "learning_rate": 2.7043177946493148e-06, "loss": 0.6152, "step": 10347 }, { "epoch": 0.66, "grad_norm": 1.3995323485406865, "learning_rate": 2.7033970034987144e-06, "loss": 0.6713, "step": 10348 }, { "epoch": 0.66, "grad_norm": 1.479969268243028, "learning_rate": 2.7024763110512808e-06, "loss": 0.6316, "step": 10349 }, { "epoch": 0.66, "grad_norm": 1.9128380387293382, "learning_rate": 2.7015557173465897e-06, "loss": 0.7683, "step": 10350 }, { "epoch": 0.66, "grad_norm": 1.5844721194610571, "learning_rate": 2.700635222424204e-06, "loss": 0.7314, "step": 10351 }, { "epoch": 0.66, "grad_norm": 1.4580278070222856, "learning_rate": 2.699714826323685e-06, "loss": 0.6908, "step": 10352 }, { "epoch": 0.66, "grad_norm": 1.0544170242111979, "learning_rate": 2.6987945290845884e-06, "loss": 0.6117, "step": 10353 }, { "epoch": 0.66, "grad_norm": 3.014994708410889, "learning_rate": 2.6978743307464663e-06, "loss": 0.6843, "step": 10354 }, { "epoch": 0.66, "grad_norm": 1.8887101781658868, "learning_rate": 2.6969542313488716e-06, "loss": 0.6613, "step": 10355 }, { "epoch": 0.66, "grad_norm": 1.2242049584521162, "learning_rate": 2.6960342309313415e-06, "loss": 0.7147, "step": 10356 }, { "epoch": 0.66, "grad_norm": 1.0474037474499192, "learning_rate": 2.695114329533422e-06, "loss": 0.6349, "step": 10357 }, { "epoch": 0.66, "grad_norm": 1.8539304816610922, "learning_rate": 2.694194527194641e-06, "loss": 0.7133, "step": 10358 }, { "epoch": 0.66, "grad_norm": 2.021040209928433, "learning_rate": 2.693274823954537e-06, "loss": 0.6499, "step": 10359 }, { "epoch": 0.66, "grad_norm": 1.115650514972409, "learning_rate": 2.6923552198526337e-06, "loss": 0.6462, "step": 10360 }, { "epoch": 0.66, "grad_norm": 1.5857728089169323, "learning_rate": 2.6914357149284522e-06, "loss": 0.6793, "step": 10361 }, { "epoch": 0.66, "grad_norm": 1.4314792290186271, "learning_rate": 2.690516309221517e-06, "loss": 0.6432, "step": 10362 }, { "epoch": 0.66, "grad_norm": 1.4520144178728953, "learning_rate": 2.689597002771333e-06, "loss": 0.6149, "step": 10363 }, { "epoch": 0.66, "grad_norm": 1.5477983454410122, "learning_rate": 2.688677795617419e-06, "loss": 0.6706, "step": 10364 }, { "epoch": 0.66, "grad_norm": 1.6454822427027245, "learning_rate": 2.6877586877992735e-06, "loss": 0.6852, "step": 10365 }, { "epoch": 0.66, "grad_norm": 1.4980024877419145, "learning_rate": 2.686839679356402e-06, "loss": 0.6412, "step": 10366 }, { "epoch": 0.66, "grad_norm": 1.4122060275901764, "learning_rate": 2.6859207703283012e-06, "loss": 0.6244, "step": 10367 }, { "epoch": 0.66, "grad_norm": 1.2950411299956286, "learning_rate": 2.685001960754463e-06, "loss": 0.65, "step": 10368 }, { "epoch": 0.66, "grad_norm": 2.0799074250123506, "learning_rate": 2.6840832506743757e-06, "loss": 0.6402, "step": 10369 }, { "epoch": 0.66, "grad_norm": 1.5909213672303237, "learning_rate": 2.6831646401275226e-06, "loss": 0.5776, "step": 10370 }, { "epoch": 0.66, "grad_norm": 1.4746162975186399, "learning_rate": 2.6822461291533874e-06, "loss": 0.7293, "step": 10371 }, { "epoch": 0.66, "grad_norm": 1.3409696278673913, "learning_rate": 2.681327717791441e-06, "loss": 0.601, "step": 10372 }, { "epoch": 0.66, "grad_norm": 1.557788961204115, "learning_rate": 2.6804094060811604e-06, "loss": 0.8156, "step": 10373 }, { "epoch": 0.66, "grad_norm": 1.0221231177602994, "learning_rate": 2.679491194062005e-06, "loss": 0.5454, "step": 10374 }, { "epoch": 0.66, "grad_norm": 1.565702278548683, "learning_rate": 2.6785730817734446e-06, "loss": 0.6784, "step": 10375 }, { "epoch": 0.66, "grad_norm": 1.5043868784788361, "learning_rate": 2.6776550692549346e-06, "loss": 0.6019, "step": 10376 }, { "epoch": 0.66, "grad_norm": 1.5191258056106909, "learning_rate": 2.67673715654593e-06, "loss": 0.6285, "step": 10377 }, { "epoch": 0.66, "grad_norm": 1.5478558104685065, "learning_rate": 2.675819343685881e-06, "loss": 0.6614, "step": 10378 }, { "epoch": 0.66, "grad_norm": 1.7274470302023235, "learning_rate": 2.6749016307142307e-06, "loss": 0.6314, "step": 10379 }, { "epoch": 0.66, "grad_norm": 1.5999088084430502, "learning_rate": 2.673984017670426e-06, "loss": 0.6492, "step": 10380 }, { "epoch": 0.66, "grad_norm": 1.5297382253127407, "learning_rate": 2.6730665045938975e-06, "loss": 0.7508, "step": 10381 }, { "epoch": 0.66, "grad_norm": 1.759312251897549, "learning_rate": 2.6721490915240823e-06, "loss": 0.6976, "step": 10382 }, { "epoch": 0.66, "grad_norm": 1.0428644148630808, "learning_rate": 2.671231778500408e-06, "loss": 0.6212, "step": 10383 }, { "epoch": 0.66, "grad_norm": 1.6770452452359967, "learning_rate": 2.670314565562298e-06, "loss": 0.6439, "step": 10384 }, { "epoch": 0.66, "grad_norm": 1.8828575242024064, "learning_rate": 2.669397452749173e-06, "loss": 0.7536, "step": 10385 }, { "epoch": 0.66, "grad_norm": 1.58222764727501, "learning_rate": 2.668480440100448e-06, "loss": 0.8352, "step": 10386 }, { "epoch": 0.66, "grad_norm": 1.1998683994813735, "learning_rate": 2.667563527655535e-06, "loss": 0.6829, "step": 10387 }, { "epoch": 0.66, "grad_norm": 1.451856948566841, "learning_rate": 2.6666467154538383e-06, "loss": 0.694, "step": 10388 }, { "epoch": 0.66, "grad_norm": 1.5540645463676321, "learning_rate": 2.6657300035347643e-06, "loss": 0.6695, "step": 10389 }, { "epoch": 0.67, "grad_norm": 1.2364870836007193, "learning_rate": 2.6648133919377106e-06, "loss": 0.6917, "step": 10390 }, { "epoch": 0.67, "grad_norm": 1.469290469885493, "learning_rate": 2.6638968807020693e-06, "loss": 0.6477, "step": 10391 }, { "epoch": 0.67, "grad_norm": 1.6780769969905132, "learning_rate": 2.6629804698672325e-06, "loss": 0.6983, "step": 10392 }, { "epoch": 0.67, "grad_norm": 1.234378198975289, "learning_rate": 2.6620641594725834e-06, "loss": 0.6316, "step": 10393 }, { "epoch": 0.67, "grad_norm": 1.729756825887103, "learning_rate": 2.6611479495575044e-06, "loss": 0.6384, "step": 10394 }, { "epoch": 0.67, "grad_norm": 1.0136614932135855, "learning_rate": 2.6602318401613715e-06, "loss": 0.5856, "step": 10395 }, { "epoch": 0.67, "grad_norm": 1.6357261483979246, "learning_rate": 2.6593158313235556e-06, "loss": 0.6157, "step": 10396 }, { "epoch": 0.67, "grad_norm": 1.537987015459933, "learning_rate": 2.6583999230834294e-06, "loss": 0.6812, "step": 10397 }, { "epoch": 0.67, "grad_norm": 1.1348659965321741, "learning_rate": 2.6574841154803546e-06, "loss": 0.5094, "step": 10398 }, { "epoch": 0.67, "grad_norm": 1.6905427544831229, "learning_rate": 2.656568408553689e-06, "loss": 0.6374, "step": 10399 }, { "epoch": 0.67, "grad_norm": 1.5773247923724167, "learning_rate": 2.655652802342789e-06, "loss": 0.6646, "step": 10400 }, { "epoch": 0.67, "grad_norm": 1.6058831877973574, "learning_rate": 2.654737296887006e-06, "loss": 0.7032, "step": 10401 }, { "epoch": 0.67, "grad_norm": 1.6846611871413901, "learning_rate": 2.6538218922256857e-06, "loss": 0.7011, "step": 10402 }, { "epoch": 0.67, "grad_norm": 1.6092757288978026, "learning_rate": 2.652906588398171e-06, "loss": 0.6397, "step": 10403 }, { "epoch": 0.67, "grad_norm": 1.3800792438884575, "learning_rate": 2.6519913854437984e-06, "loss": 0.6184, "step": 10404 }, { "epoch": 0.67, "grad_norm": 1.4139806508193107, "learning_rate": 2.6510762834019006e-06, "loss": 0.6669, "step": 10405 }, { "epoch": 0.67, "grad_norm": 1.5588112031501682, "learning_rate": 2.65016128231181e-06, "loss": 0.7193, "step": 10406 }, { "epoch": 0.67, "grad_norm": 1.5109588930574493, "learning_rate": 2.64924638221285e-06, "loss": 0.7615, "step": 10407 }, { "epoch": 0.67, "grad_norm": 1.6752886424468678, "learning_rate": 2.6483315831443412e-06, "loss": 0.6983, "step": 10408 }, { "epoch": 0.67, "grad_norm": 1.7017099190408753, "learning_rate": 2.6474168851455995e-06, "loss": 0.5536, "step": 10409 }, { "epoch": 0.67, "grad_norm": 1.803856530002006, "learning_rate": 2.646502288255936e-06, "loss": 0.7084, "step": 10410 }, { "epoch": 0.67, "grad_norm": 1.6294601793591537, "learning_rate": 2.645587792514659e-06, "loss": 0.6023, "step": 10411 }, { "epoch": 0.67, "grad_norm": 1.700799137379463, "learning_rate": 2.6446733979610696e-06, "loss": 0.7283, "step": 10412 }, { "epoch": 0.67, "grad_norm": 1.4277086770637324, "learning_rate": 2.6437591046344724e-06, "loss": 0.6956, "step": 10413 }, { "epoch": 0.67, "grad_norm": 1.6163170244304248, "learning_rate": 2.642844912574153e-06, "loss": 0.681, "step": 10414 }, { "epoch": 0.67, "grad_norm": 1.6410983302745334, "learning_rate": 2.641930821819409e-06, "loss": 0.6659, "step": 10415 }, { "epoch": 0.67, "grad_norm": 1.5105159166127713, "learning_rate": 2.641016832409523e-06, "loss": 0.6426, "step": 10416 }, { "epoch": 0.67, "grad_norm": 0.9558641950646433, "learning_rate": 2.6401029443837767e-06, "loss": 0.6343, "step": 10417 }, { "epoch": 0.67, "grad_norm": 1.678466252941847, "learning_rate": 2.6391891577814468e-06, "loss": 0.7239, "step": 10418 }, { "epoch": 0.67, "grad_norm": 1.5817428639098532, "learning_rate": 2.6382754726418047e-06, "loss": 0.6934, "step": 10419 }, { "epoch": 0.67, "grad_norm": 1.579006491541759, "learning_rate": 2.637361889004123e-06, "loss": 0.7284, "step": 10420 }, { "epoch": 0.67, "grad_norm": 1.6291349937348059, "learning_rate": 2.63644840690766e-06, "loss": 0.6809, "step": 10421 }, { "epoch": 0.67, "grad_norm": 1.702727388499616, "learning_rate": 2.6355350263916814e-06, "loss": 0.7369, "step": 10422 }, { "epoch": 0.67, "grad_norm": 1.6129375029198756, "learning_rate": 2.634621747495435e-06, "loss": 0.6193, "step": 10423 }, { "epoch": 0.67, "grad_norm": 1.4790613752366177, "learning_rate": 2.6337085702581776e-06, "loss": 0.6805, "step": 10424 }, { "epoch": 0.67, "grad_norm": 1.520373627310011, "learning_rate": 2.632795494719154e-06, "loss": 0.6465, "step": 10425 }, { "epoch": 0.67, "grad_norm": 1.5607615067051732, "learning_rate": 2.6318825209176025e-06, "loss": 0.619, "step": 10426 }, { "epoch": 0.67, "grad_norm": 1.7775367430577391, "learning_rate": 2.6309696488927692e-06, "loss": 0.7061, "step": 10427 }, { "epoch": 0.67, "grad_norm": 1.0728365537774252, "learning_rate": 2.630056878683878e-06, "loss": 0.6781, "step": 10428 }, { "epoch": 0.67, "grad_norm": 1.4414612474193307, "learning_rate": 2.6291442103301667e-06, "loss": 0.6623, "step": 10429 }, { "epoch": 0.67, "grad_norm": 1.5620948288748477, "learning_rate": 2.628231643870851e-06, "loss": 0.6706, "step": 10430 }, { "epoch": 0.67, "grad_norm": 1.703435262868299, "learning_rate": 2.6273191793451575e-06, "loss": 0.7422, "step": 10431 }, { "epoch": 0.67, "grad_norm": 1.7402346841082297, "learning_rate": 2.6264068167923003e-06, "loss": 0.7026, "step": 10432 }, { "epoch": 0.67, "grad_norm": 1.5854144079028298, "learning_rate": 2.6254945562514877e-06, "loss": 0.7397, "step": 10433 }, { "epoch": 0.67, "grad_norm": 1.7380151362671192, "learning_rate": 2.6245823977619343e-06, "loss": 0.6789, "step": 10434 }, { "epoch": 0.67, "grad_norm": 1.5584396408296743, "learning_rate": 2.623670341362834e-06, "loss": 0.5946, "step": 10435 }, { "epoch": 0.67, "grad_norm": 1.5287443469442392, "learning_rate": 2.6227583870933926e-06, "loss": 0.6895, "step": 10436 }, { "epoch": 0.67, "grad_norm": 1.6487757560489076, "learning_rate": 2.621846534992797e-06, "loss": 0.6581, "step": 10437 }, { "epoch": 0.67, "grad_norm": 1.253278433847494, "learning_rate": 2.620934785100242e-06, "loss": 0.5934, "step": 10438 }, { "epoch": 0.67, "grad_norm": 1.6276565025093606, "learning_rate": 2.62002313745491e-06, "loss": 0.6559, "step": 10439 }, { "epoch": 0.67, "grad_norm": 1.4292046654241155, "learning_rate": 2.619111592095984e-06, "loss": 0.6588, "step": 10440 }, { "epoch": 0.67, "grad_norm": 1.6019948889949769, "learning_rate": 2.6182001490626374e-06, "loss": 0.7725, "step": 10441 }, { "epoch": 0.67, "grad_norm": 1.6379818143227747, "learning_rate": 2.6172888083940444e-06, "loss": 0.7546, "step": 10442 }, { "epoch": 0.67, "grad_norm": 1.5241103032248668, "learning_rate": 2.6163775701293714e-06, "loss": 0.6493, "step": 10443 }, { "epoch": 0.67, "grad_norm": 1.0992131391342146, "learning_rate": 2.6154664343077798e-06, "loss": 0.6458, "step": 10444 }, { "epoch": 0.67, "grad_norm": 1.5436640303016909, "learning_rate": 2.6145554009684343e-06, "loss": 0.7506, "step": 10445 }, { "epoch": 0.67, "grad_norm": 1.6866331947249342, "learning_rate": 2.6136444701504813e-06, "loss": 0.685, "step": 10446 }, { "epoch": 0.67, "grad_norm": 1.712630458258357, "learning_rate": 2.612733641893076e-06, "loss": 0.6411, "step": 10447 }, { "epoch": 0.67, "grad_norm": 1.6857170614900345, "learning_rate": 2.611822916235363e-06, "loss": 0.6198, "step": 10448 }, { "epoch": 0.67, "grad_norm": 1.3584396510129995, "learning_rate": 2.610912293216483e-06, "loss": 0.6149, "step": 10449 }, { "epoch": 0.67, "grad_norm": 1.3924113517314345, "learning_rate": 2.6100017728755717e-06, "loss": 0.6567, "step": 10450 }, { "epoch": 0.67, "grad_norm": 1.6334220170742986, "learning_rate": 2.6090913552517626e-06, "loss": 0.6974, "step": 10451 }, { "epoch": 0.67, "grad_norm": 1.3128512072813134, "learning_rate": 2.608181040384183e-06, "loss": 0.5777, "step": 10452 }, { "epoch": 0.67, "grad_norm": 1.5119353102261703, "learning_rate": 2.6072708283119543e-06, "loss": 0.6525, "step": 10453 }, { "epoch": 0.67, "grad_norm": 1.7070470439694598, "learning_rate": 2.6063607190741997e-06, "loss": 0.6751, "step": 10454 }, { "epoch": 0.67, "grad_norm": 1.5780020034681355, "learning_rate": 2.6054507127100315e-06, "loss": 0.5972, "step": 10455 }, { "epoch": 0.67, "grad_norm": 1.5478018114112175, "learning_rate": 2.60454080925856e-06, "loss": 0.6675, "step": 10456 }, { "epoch": 0.67, "grad_norm": 1.6065466583003156, "learning_rate": 2.6036310087588913e-06, "loss": 0.7062, "step": 10457 }, { "epoch": 0.67, "grad_norm": 1.5964872050955357, "learning_rate": 2.602721311250126e-06, "loss": 0.6389, "step": 10458 }, { "epoch": 0.67, "grad_norm": 1.8328087287742574, "learning_rate": 2.601811716771362e-06, "loss": 0.6632, "step": 10459 }, { "epoch": 0.67, "grad_norm": 1.5308617718908666, "learning_rate": 2.60090222536169e-06, "loss": 0.6329, "step": 10460 }, { "epoch": 0.67, "grad_norm": 1.3782506163277055, "learning_rate": 2.599992837060198e-06, "loss": 0.6351, "step": 10461 }, { "epoch": 0.67, "grad_norm": 2.0538869246426934, "learning_rate": 2.5990835519059724e-06, "loss": 0.6675, "step": 10462 }, { "epoch": 0.67, "grad_norm": 1.451370830723234, "learning_rate": 2.598174369938091e-06, "loss": 0.658, "step": 10463 }, { "epoch": 0.67, "grad_norm": 1.599310611571564, "learning_rate": 2.5972652911956287e-06, "loss": 0.656, "step": 10464 }, { "epoch": 0.67, "grad_norm": 1.2871457817747034, "learning_rate": 2.596356315717654e-06, "loss": 0.5892, "step": 10465 }, { "epoch": 0.67, "grad_norm": 2.2434364214205567, "learning_rate": 2.5954474435432353e-06, "loss": 0.7158, "step": 10466 }, { "epoch": 0.67, "grad_norm": 1.0402409184579222, "learning_rate": 2.594538674711432e-06, "loss": 0.7298, "step": 10467 }, { "epoch": 0.67, "grad_norm": 1.482940814793126, "learning_rate": 2.593630009261301e-06, "loss": 0.6574, "step": 10468 }, { "epoch": 0.67, "grad_norm": 1.7127332705348557, "learning_rate": 2.5927214472318983e-06, "loss": 0.6748, "step": 10469 }, { "epoch": 0.67, "grad_norm": 1.4718693587778031, "learning_rate": 2.5918129886622667e-06, "loss": 0.6394, "step": 10470 }, { "epoch": 0.67, "grad_norm": 1.5459243997501402, "learning_rate": 2.5909046335914533e-06, "loss": 0.6699, "step": 10471 }, { "epoch": 0.67, "grad_norm": 1.4487229845198637, "learning_rate": 2.5899963820584973e-06, "loss": 0.6502, "step": 10472 }, { "epoch": 0.67, "grad_norm": 1.498243089011029, "learning_rate": 2.5890882341024328e-06, "loss": 0.688, "step": 10473 }, { "epoch": 0.67, "grad_norm": 1.5980918197783691, "learning_rate": 2.5881801897622895e-06, "loss": 0.6813, "step": 10474 }, { "epoch": 0.67, "grad_norm": 1.3986510515091262, "learning_rate": 2.5872722490770917e-06, "loss": 0.6306, "step": 10475 }, { "epoch": 0.67, "grad_norm": 1.8281397841545453, "learning_rate": 2.5863644120858675e-06, "loss": 0.6901, "step": 10476 }, { "epoch": 0.67, "grad_norm": 1.4395627338603652, "learning_rate": 2.5854566788276247e-06, "loss": 0.7015, "step": 10477 }, { "epoch": 0.67, "grad_norm": 1.5675965192488093, "learning_rate": 2.584549049341384e-06, "loss": 0.6545, "step": 10478 }, { "epoch": 0.67, "grad_norm": 1.6839778015134996, "learning_rate": 2.583641523666146e-06, "loss": 0.697, "step": 10479 }, { "epoch": 0.67, "grad_norm": 1.6534721537134716, "learning_rate": 2.5827341018409197e-06, "loss": 0.7352, "step": 10480 }, { "epoch": 0.67, "grad_norm": 1.50625540941537, "learning_rate": 2.5818267839047018e-06, "loss": 0.7545, "step": 10481 }, { "epoch": 0.67, "grad_norm": 1.5718989209522, "learning_rate": 2.5809195698964885e-06, "loss": 0.6354, "step": 10482 }, { "epoch": 0.67, "grad_norm": 1.500977495304791, "learning_rate": 2.5800124598552676e-06, "loss": 0.6277, "step": 10483 }, { "epoch": 0.67, "grad_norm": 1.5249010619136232, "learning_rate": 2.5791054538200255e-06, "loss": 0.7115, "step": 10484 }, { "epoch": 0.67, "grad_norm": 1.5175977622349344, "learning_rate": 2.5781985518297468e-06, "loss": 0.6697, "step": 10485 }, { "epoch": 0.67, "grad_norm": 1.5927501679012472, "learning_rate": 2.577291753923402e-06, "loss": 0.6779, "step": 10486 }, { "epoch": 0.67, "grad_norm": 1.6103031980845768, "learning_rate": 2.576385060139971e-06, "loss": 0.6678, "step": 10487 }, { "epoch": 0.67, "grad_norm": 1.4773422384678228, "learning_rate": 2.575478470518413e-06, "loss": 0.7766, "step": 10488 }, { "epoch": 0.67, "grad_norm": 1.5248098883889893, "learning_rate": 2.574571985097698e-06, "loss": 0.6447, "step": 10489 }, { "epoch": 0.67, "grad_norm": 1.4830285686325935, "learning_rate": 2.5736656039167817e-06, "loss": 0.7625, "step": 10490 }, { "epoch": 0.67, "grad_norm": 1.4108909893758044, "learning_rate": 2.572759327014618e-06, "loss": 0.6029, "step": 10491 }, { "epoch": 0.67, "grad_norm": 1.5853649037574689, "learning_rate": 2.5718531544301623e-06, "loss": 0.7218, "step": 10492 }, { "epoch": 0.67, "grad_norm": 1.556326576826344, "learning_rate": 2.5709470862023517e-06, "loss": 0.6594, "step": 10493 }, { "epoch": 0.67, "grad_norm": 1.4313974093351305, "learning_rate": 2.570041122370135e-06, "loss": 0.6459, "step": 10494 }, { "epoch": 0.67, "grad_norm": 1.6087709147482225, "learning_rate": 2.5691352629724407e-06, "loss": 0.668, "step": 10495 }, { "epoch": 0.67, "grad_norm": 1.63991992985727, "learning_rate": 2.5682295080482073e-06, "loss": 0.6979, "step": 10496 }, { "epoch": 0.67, "grad_norm": 1.6551245790696583, "learning_rate": 2.567323857636359e-06, "loss": 0.8359, "step": 10497 }, { "epoch": 0.67, "grad_norm": 1.548030392004764, "learning_rate": 2.5664183117758184e-06, "loss": 0.6908, "step": 10498 }, { "epoch": 0.67, "grad_norm": 1.5342040266187498, "learning_rate": 2.565512870505509e-06, "loss": 0.6475, "step": 10499 }, { "epoch": 0.67, "grad_norm": 2.0788564213420595, "learning_rate": 2.5646075338643362e-06, "loss": 0.7, "step": 10500 }, { "epoch": 0.67, "grad_norm": 1.6322773553751682, "learning_rate": 2.5637023018912187e-06, "loss": 0.6723, "step": 10501 }, { "epoch": 0.67, "grad_norm": 1.4260686683565602, "learning_rate": 2.5627971746250523e-06, "loss": 0.6386, "step": 10502 }, { "epoch": 0.67, "grad_norm": 1.4561767866760058, "learning_rate": 2.561892152104745e-06, "loss": 0.7002, "step": 10503 }, { "epoch": 0.67, "grad_norm": 1.579455137582007, "learning_rate": 2.560987234369189e-06, "loss": 0.7549, "step": 10504 }, { "epoch": 0.67, "grad_norm": 1.539559059813408, "learning_rate": 2.5600824214572774e-06, "loss": 0.6111, "step": 10505 }, { "epoch": 0.67, "grad_norm": 1.4602728100880022, "learning_rate": 2.5591777134078954e-06, "loss": 0.6874, "step": 10506 }, { "epoch": 0.67, "grad_norm": 1.4989053724316903, "learning_rate": 2.558273110259925e-06, "loss": 0.5857, "step": 10507 }, { "epoch": 0.67, "grad_norm": 1.54883498124568, "learning_rate": 2.5573686120522496e-06, "loss": 0.6118, "step": 10508 }, { "epoch": 0.67, "grad_norm": 1.2606742179112795, "learning_rate": 2.5564642188237345e-06, "loss": 0.6162, "step": 10509 }, { "epoch": 0.67, "grad_norm": 1.014954080701957, "learning_rate": 2.5555599306132547e-06, "loss": 0.6481, "step": 10510 }, { "epoch": 0.67, "grad_norm": 1.8343235494869685, "learning_rate": 2.5546557474596725e-06, "loss": 0.6222, "step": 10511 }, { "epoch": 0.67, "grad_norm": 1.6699627933605032, "learning_rate": 2.5537516694018476e-06, "loss": 0.6757, "step": 10512 }, { "epoch": 0.67, "grad_norm": 1.5931219546450812, "learning_rate": 2.5528476964786365e-06, "loss": 0.653, "step": 10513 }, { "epoch": 0.67, "grad_norm": 1.5691709867443349, "learning_rate": 2.551943828728888e-06, "loss": 0.6735, "step": 10514 }, { "epoch": 0.67, "grad_norm": 1.4401482175797278, "learning_rate": 2.5510400661914503e-06, "loss": 0.6998, "step": 10515 }, { "epoch": 0.67, "grad_norm": 1.3145215932275096, "learning_rate": 2.550136408905165e-06, "loss": 0.6, "step": 10516 }, { "epoch": 0.67, "grad_norm": 1.484566124952464, "learning_rate": 2.5492328569088666e-06, "loss": 0.6261, "step": 10517 }, { "epoch": 0.67, "grad_norm": 1.359491279595494, "learning_rate": 2.5483294102413925e-06, "loss": 0.583, "step": 10518 }, { "epoch": 0.67, "grad_norm": 1.8137324370007673, "learning_rate": 2.5474260689415687e-06, "loss": 0.7569, "step": 10519 }, { "epoch": 0.67, "grad_norm": 1.8312485408569343, "learning_rate": 2.5465228330482184e-06, "loss": 0.7198, "step": 10520 }, { "epoch": 0.67, "grad_norm": 1.6684115413726015, "learning_rate": 2.5456197026001613e-06, "loss": 0.6953, "step": 10521 }, { "epoch": 0.67, "grad_norm": 1.772514842756853, "learning_rate": 2.544716677636212e-06, "loss": 0.7265, "step": 10522 }, { "epoch": 0.67, "grad_norm": 1.79155263554456, "learning_rate": 2.543813758195181e-06, "loss": 0.634, "step": 10523 }, { "epoch": 0.67, "grad_norm": 1.57278060107647, "learning_rate": 2.5429109443158735e-06, "loss": 0.6864, "step": 10524 }, { "epoch": 0.67, "grad_norm": 1.1230356639648937, "learning_rate": 2.54200823603709e-06, "loss": 0.6469, "step": 10525 }, { "epoch": 0.67, "grad_norm": 1.8308328144941428, "learning_rate": 2.5411056333976266e-06, "loss": 0.6285, "step": 10526 }, { "epoch": 0.67, "grad_norm": 1.4269606297220707, "learning_rate": 2.5402031364362772e-06, "loss": 0.5549, "step": 10527 }, { "epoch": 0.67, "grad_norm": 1.6358638378723336, "learning_rate": 2.539300745191829e-06, "loss": 0.707, "step": 10528 }, { "epoch": 0.67, "grad_norm": 1.8254224167286195, "learning_rate": 2.5383984597030638e-06, "loss": 0.7088, "step": 10529 }, { "epoch": 0.67, "grad_norm": 1.459934789295393, "learning_rate": 2.53749628000876e-06, "loss": 0.714, "step": 10530 }, { "epoch": 0.67, "grad_norm": 1.5841086133070377, "learning_rate": 2.5365942061476924e-06, "loss": 0.6096, "step": 10531 }, { "epoch": 0.67, "grad_norm": 1.5705293685713575, "learning_rate": 2.5356922381586287e-06, "loss": 0.6478, "step": 10532 }, { "epoch": 0.67, "grad_norm": 1.728936656398597, "learning_rate": 2.5347903760803327e-06, "loss": 0.711, "step": 10533 }, { "epoch": 0.67, "grad_norm": 1.658405699064266, "learning_rate": 2.53388861995157e-06, "loss": 0.7696, "step": 10534 }, { "epoch": 0.67, "grad_norm": 1.6778122144716585, "learning_rate": 2.532986969811089e-06, "loss": 0.7241, "step": 10535 }, { "epoch": 0.67, "grad_norm": 1.6683329353132121, "learning_rate": 2.532085425697646e-06, "loss": 0.7976, "step": 10536 }, { "epoch": 0.67, "grad_norm": 1.502770690997373, "learning_rate": 2.5311839876499843e-06, "loss": 0.7662, "step": 10537 }, { "epoch": 0.67, "grad_norm": 1.6112192676034007, "learning_rate": 2.530282655706848e-06, "loss": 0.7102, "step": 10538 }, { "epoch": 0.67, "grad_norm": 1.1237078607658761, "learning_rate": 2.5293814299069742e-06, "loss": 0.6213, "step": 10539 }, { "epoch": 0.67, "grad_norm": 1.5069988712688749, "learning_rate": 2.528480310289093e-06, "loss": 0.6628, "step": 10540 }, { "epoch": 0.67, "grad_norm": 1.4865430435195264, "learning_rate": 2.5275792968919376e-06, "loss": 0.6733, "step": 10541 }, { "epoch": 0.67, "grad_norm": 1.619563401065116, "learning_rate": 2.526678389754226e-06, "loss": 0.5873, "step": 10542 }, { "epoch": 0.67, "grad_norm": 1.891059841844495, "learning_rate": 2.525777588914683e-06, "loss": 0.6599, "step": 10543 }, { "epoch": 0.67, "grad_norm": 1.7015168395202613, "learning_rate": 2.524876894412017e-06, "loss": 0.5995, "step": 10544 }, { "epoch": 0.67, "grad_norm": 1.647137718283026, "learning_rate": 2.5239763062849424e-06, "loss": 0.6126, "step": 10545 }, { "epoch": 0.68, "grad_norm": 1.7321308955223285, "learning_rate": 2.5230758245721636e-06, "loss": 0.7081, "step": 10546 }, { "epoch": 0.68, "grad_norm": 1.475191740619257, "learning_rate": 2.5221754493123798e-06, "loss": 0.5955, "step": 10547 }, { "epoch": 0.68, "grad_norm": 1.476538332065343, "learning_rate": 2.5212751805442916e-06, "loss": 0.6254, "step": 10548 }, { "epoch": 0.68, "grad_norm": 2.5297186266947937, "learning_rate": 2.520375018306584e-06, "loss": 0.6664, "step": 10549 }, { "epoch": 0.68, "grad_norm": 1.7419871057820522, "learning_rate": 2.5194749626379515e-06, "loss": 0.6008, "step": 10550 }, { "epoch": 0.68, "grad_norm": 1.6387886486061147, "learning_rate": 2.5185750135770694e-06, "loss": 0.7451, "step": 10551 }, { "epoch": 0.68, "grad_norm": 1.7466249320653662, "learning_rate": 2.51767517116262e-06, "loss": 0.6676, "step": 10552 }, { "epoch": 0.68, "grad_norm": 1.5231552077339727, "learning_rate": 2.516775435433276e-06, "loss": 0.6347, "step": 10553 }, { "epoch": 0.68, "grad_norm": 1.6403396924898452, "learning_rate": 2.5158758064277068e-06, "loss": 0.6636, "step": 10554 }, { "epoch": 0.68, "grad_norm": 1.7055104036346727, "learning_rate": 2.514976284184575e-06, "loss": 0.6202, "step": 10555 }, { "epoch": 0.68, "grad_norm": 1.6397430076509656, "learning_rate": 2.514076868742538e-06, "loss": 0.6483, "step": 10556 }, { "epoch": 0.68, "grad_norm": 1.4056012621391636, "learning_rate": 2.5131775601402588e-06, "loss": 0.6285, "step": 10557 }, { "epoch": 0.68, "grad_norm": 1.693364042751427, "learning_rate": 2.5122783584163774e-06, "loss": 0.7205, "step": 10558 }, { "epoch": 0.68, "grad_norm": 1.6659632901927979, "learning_rate": 2.511379263609547e-06, "loss": 0.7527, "step": 10559 }, { "epoch": 0.68, "grad_norm": 1.6582361874285583, "learning_rate": 2.5104802757584067e-06, "loss": 0.7379, "step": 10560 }, { "epoch": 0.68, "grad_norm": 1.6759872872411168, "learning_rate": 2.5095813949015934e-06, "loss": 0.6224, "step": 10561 }, { "epoch": 0.68, "grad_norm": 1.494457906574902, "learning_rate": 2.5086826210777383e-06, "loss": 0.672, "step": 10562 }, { "epoch": 0.68, "grad_norm": 1.527700275113762, "learning_rate": 2.507783954325467e-06, "loss": 0.6755, "step": 10563 }, { "epoch": 0.68, "grad_norm": 1.6817598785356922, "learning_rate": 2.506885394683408e-06, "loss": 0.6618, "step": 10564 }, { "epoch": 0.68, "grad_norm": 1.4070354145331052, "learning_rate": 2.505986942190173e-06, "loss": 0.7637, "step": 10565 }, { "epoch": 0.68, "grad_norm": 1.502579711278537, "learning_rate": 2.5050885968843817e-06, "loss": 0.6781, "step": 10566 }, { "epoch": 0.68, "grad_norm": 1.9707473637772939, "learning_rate": 2.504190358804637e-06, "loss": 0.7398, "step": 10567 }, { "epoch": 0.68, "grad_norm": 1.3163052849117083, "learning_rate": 2.5032922279895476e-06, "loss": 0.6748, "step": 10568 }, { "epoch": 0.68, "grad_norm": 1.323609131058778, "learning_rate": 2.5023942044777114e-06, "loss": 0.5552, "step": 10569 }, { "epoch": 0.68, "grad_norm": 1.4945924007128044, "learning_rate": 2.5014962883077256e-06, "loss": 0.6251, "step": 10570 }, { "epoch": 0.68, "grad_norm": 1.1215897331721345, "learning_rate": 2.5005984795181783e-06, "loss": 0.6905, "step": 10571 }, { "epoch": 0.68, "grad_norm": 1.481231947081047, "learning_rate": 2.499700778147654e-06, "loss": 0.6537, "step": 10572 }, { "epoch": 0.68, "grad_norm": 1.6939795686110906, "learning_rate": 2.498803184234741e-06, "loss": 0.6644, "step": 10573 }, { "epoch": 0.68, "grad_norm": 1.5070165841818954, "learning_rate": 2.4979056978180073e-06, "loss": 0.6432, "step": 10574 }, { "epoch": 0.68, "grad_norm": 0.9763486741690972, "learning_rate": 2.497008318936031e-06, "loss": 0.6115, "step": 10575 }, { "epoch": 0.68, "grad_norm": 2.0363004474342303, "learning_rate": 2.4961110476273775e-06, "loss": 0.6166, "step": 10576 }, { "epoch": 0.68, "grad_norm": 1.1313592625793971, "learning_rate": 2.49521388393061e-06, "loss": 0.6268, "step": 10577 }, { "epoch": 0.68, "grad_norm": 1.3652923474092054, "learning_rate": 2.4943168278842865e-06, "loss": 0.7126, "step": 10578 }, { "epoch": 0.68, "grad_norm": 1.1162919929531219, "learning_rate": 2.4934198795269603e-06, "loss": 0.6392, "step": 10579 }, { "epoch": 0.68, "grad_norm": 1.4993989802233676, "learning_rate": 2.492523038897181e-06, "loss": 0.6875, "step": 10580 }, { "epoch": 0.68, "grad_norm": 1.1755877170669644, "learning_rate": 2.4916263060334903e-06, "loss": 0.6135, "step": 10581 }, { "epoch": 0.68, "grad_norm": 1.5329541845556898, "learning_rate": 2.4907296809744324e-06, "loss": 0.7626, "step": 10582 }, { "epoch": 0.68, "grad_norm": 1.0377996334321298, "learning_rate": 2.48983316375854e-06, "loss": 0.6383, "step": 10583 }, { "epoch": 0.68, "grad_norm": 1.7678117810515812, "learning_rate": 2.4889367544243437e-06, "loss": 0.6339, "step": 10584 }, { "epoch": 0.68, "grad_norm": 1.5355773512024224, "learning_rate": 2.488040453010369e-06, "loss": 0.6617, "step": 10585 }, { "epoch": 0.68, "grad_norm": 1.6628355624406779, "learning_rate": 2.4871442595551378e-06, "loss": 0.7693, "step": 10586 }, { "epoch": 0.68, "grad_norm": 1.5136677559379994, "learning_rate": 2.486248174097165e-06, "loss": 0.6616, "step": 10587 }, { "epoch": 0.68, "grad_norm": 1.0226880173974857, "learning_rate": 2.4853521966749643e-06, "loss": 0.6659, "step": 10588 }, { "epoch": 0.68, "grad_norm": 1.5229135543412935, "learning_rate": 2.4844563273270404e-06, "loss": 0.699, "step": 10589 }, { "epoch": 0.68, "grad_norm": 1.4800100656571935, "learning_rate": 2.483560566091901e-06, "loss": 0.6519, "step": 10590 }, { "epoch": 0.68, "grad_norm": 1.4822196713182287, "learning_rate": 2.482664913008037e-06, "loss": 0.6839, "step": 10591 }, { "epoch": 0.68, "grad_norm": 1.7135744682209884, "learning_rate": 2.4817693681139475e-06, "loss": 0.6562, "step": 10592 }, { "epoch": 0.68, "grad_norm": 1.0823936350271424, "learning_rate": 2.4808739314481196e-06, "loss": 0.6794, "step": 10593 }, { "epoch": 0.68, "grad_norm": 1.6442178847824107, "learning_rate": 2.479978603049036e-06, "loss": 0.6275, "step": 10594 }, { "epoch": 0.68, "grad_norm": 1.475810197713273, "learning_rate": 2.4790833829551764e-06, "loss": 0.6307, "step": 10595 }, { "epoch": 0.68, "grad_norm": 1.4709298972446256, "learning_rate": 2.4781882712050164e-06, "loss": 0.7404, "step": 10596 }, { "epoch": 0.68, "grad_norm": 1.612999217703421, "learning_rate": 2.4772932678370255e-06, "loss": 0.6837, "step": 10597 }, { "epoch": 0.68, "grad_norm": 1.5984324389705136, "learning_rate": 2.476398372889667e-06, "loss": 0.6471, "step": 10598 }, { "epoch": 0.68, "grad_norm": 1.4152286555057396, "learning_rate": 2.475503586401407e-06, "loss": 0.5806, "step": 10599 }, { "epoch": 0.68, "grad_norm": 1.4665738791091427, "learning_rate": 2.474608908410695e-06, "loss": 0.6178, "step": 10600 }, { "epoch": 0.68, "grad_norm": 1.719645377755975, "learning_rate": 2.473714338955987e-06, "loss": 0.7738, "step": 10601 }, { "epoch": 0.68, "grad_norm": 1.6686196990410178, "learning_rate": 2.4728198780757283e-06, "loss": 0.6376, "step": 10602 }, { "epoch": 0.68, "grad_norm": 1.692195125683931, "learning_rate": 2.4719255258083614e-06, "loss": 0.5897, "step": 10603 }, { "epoch": 0.68, "grad_norm": 1.3977483636493562, "learning_rate": 2.4710312821923225e-06, "loss": 0.6047, "step": 10604 }, { "epoch": 0.68, "grad_norm": 1.4809013269599824, "learning_rate": 2.4701371472660433e-06, "loss": 0.6995, "step": 10605 }, { "epoch": 0.68, "grad_norm": 1.6087623376682514, "learning_rate": 2.4692431210679575e-06, "loss": 0.6008, "step": 10606 }, { "epoch": 0.68, "grad_norm": 1.7863856768332989, "learning_rate": 2.4683492036364805e-06, "loss": 0.7234, "step": 10607 }, { "epoch": 0.68, "grad_norm": 1.7542776747339117, "learning_rate": 2.4674553950100388e-06, "loss": 0.6965, "step": 10608 }, { "epoch": 0.68, "grad_norm": 2.801046777924743, "learning_rate": 2.466561695227039e-06, "loss": 0.615, "step": 10609 }, { "epoch": 0.68, "grad_norm": 2.0269187178152843, "learning_rate": 2.465668104325896e-06, "loss": 0.5171, "step": 10610 }, { "epoch": 0.68, "grad_norm": 1.3591739690527782, "learning_rate": 2.4647746223450123e-06, "loss": 0.6529, "step": 10611 }, { "epoch": 0.68, "grad_norm": 1.64975274050896, "learning_rate": 2.4638812493227864e-06, "loss": 0.6778, "step": 10612 }, { "epoch": 0.68, "grad_norm": 1.5241111959095344, "learning_rate": 2.462987985297619e-06, "loss": 0.7016, "step": 10613 }, { "epoch": 0.68, "grad_norm": 1.3360630141793122, "learning_rate": 2.462094830307893e-06, "loss": 0.6665, "step": 10614 }, { "epoch": 0.68, "grad_norm": 1.465469386985566, "learning_rate": 2.461201784392002e-06, "loss": 0.5975, "step": 10615 }, { "epoch": 0.68, "grad_norm": 1.0873510213215718, "learning_rate": 2.4603088475883203e-06, "loss": 0.6419, "step": 10616 }, { "epoch": 0.68, "grad_norm": 1.4317551447634111, "learning_rate": 2.459416019935229e-06, "loss": 0.7067, "step": 10617 }, { "epoch": 0.68, "grad_norm": 1.609705600580753, "learning_rate": 2.458523301471099e-06, "loss": 0.6367, "step": 10618 }, { "epoch": 0.68, "grad_norm": 1.4643560176497548, "learning_rate": 2.4576306922342956e-06, "loss": 0.7065, "step": 10619 }, { "epoch": 0.68, "grad_norm": 1.6176024759703054, "learning_rate": 2.456738192263186e-06, "loss": 0.5837, "step": 10620 }, { "epoch": 0.68, "grad_norm": 1.0001909262042894, "learning_rate": 2.4558458015961213e-06, "loss": 0.4798, "step": 10621 }, { "epoch": 0.68, "grad_norm": 1.3943938516660748, "learning_rate": 2.454953520271462e-06, "loss": 0.6186, "step": 10622 }, { "epoch": 0.68, "grad_norm": 1.735893021411203, "learning_rate": 2.4540613483275484e-06, "loss": 0.6, "step": 10623 }, { "epoch": 0.68, "grad_norm": 1.6723371679048087, "learning_rate": 2.45316928580273e-06, "loss": 0.6969, "step": 10624 }, { "epoch": 0.68, "grad_norm": 1.824616301290904, "learning_rate": 2.4522773327353446e-06, "loss": 0.5949, "step": 10625 }, { "epoch": 0.68, "grad_norm": 1.5650615197377098, "learning_rate": 2.4513854891637253e-06, "loss": 0.7682, "step": 10626 }, { "epoch": 0.68, "grad_norm": 1.4903652405969656, "learning_rate": 2.4504937551262024e-06, "loss": 0.7048, "step": 10627 }, { "epoch": 0.68, "grad_norm": 1.3609995515021163, "learning_rate": 2.4496021306610996e-06, "loss": 0.6654, "step": 10628 }, { "epoch": 0.68, "grad_norm": 1.7992237540102145, "learning_rate": 2.448710615806741e-06, "loss": 0.6663, "step": 10629 }, { "epoch": 0.68, "grad_norm": 1.6192183041656274, "learning_rate": 2.4478192106014354e-06, "loss": 0.6557, "step": 10630 }, { "epoch": 0.68, "grad_norm": 1.153353642905899, "learning_rate": 2.4469279150834996e-06, "loss": 0.6325, "step": 10631 }, { "epoch": 0.68, "grad_norm": 1.497166106641406, "learning_rate": 2.4460367292912367e-06, "loss": 0.6719, "step": 10632 }, { "epoch": 0.68, "grad_norm": 1.0334650525044706, "learning_rate": 2.445145653262949e-06, "loss": 0.7032, "step": 10633 }, { "epoch": 0.68, "grad_norm": 1.4326579957999173, "learning_rate": 2.444254687036933e-06, "loss": 0.588, "step": 10634 }, { "epoch": 0.68, "grad_norm": 1.1564997185101686, "learning_rate": 2.4433638306514794e-06, "loss": 0.6461, "step": 10635 }, { "epoch": 0.68, "grad_norm": 1.5375141247303294, "learning_rate": 2.4424730841448766e-06, "loss": 0.6628, "step": 10636 }, { "epoch": 0.68, "grad_norm": 1.6711021621039062, "learning_rate": 2.4415824475554046e-06, "loss": 0.7305, "step": 10637 }, { "epoch": 0.68, "grad_norm": 1.7213370443618485, "learning_rate": 2.4406919209213466e-06, "loss": 0.7006, "step": 10638 }, { "epoch": 0.68, "grad_norm": 1.3368409462542847, "learning_rate": 2.439801504280968e-06, "loss": 0.7609, "step": 10639 }, { "epoch": 0.68, "grad_norm": 1.4240212007194633, "learning_rate": 2.4389111976725437e-06, "loss": 0.6891, "step": 10640 }, { "epoch": 0.68, "grad_norm": 1.5645996766319685, "learning_rate": 2.438021001134334e-06, "loss": 0.6875, "step": 10641 }, { "epoch": 0.68, "grad_norm": 1.4544408875349504, "learning_rate": 2.437130914704598e-06, "loss": 0.5942, "step": 10642 }, { "epoch": 0.68, "grad_norm": 1.7718010749889073, "learning_rate": 2.436240938421591e-06, "loss": 0.7267, "step": 10643 }, { "epoch": 0.68, "grad_norm": 1.5933966614858737, "learning_rate": 2.43535107232356e-06, "loss": 0.642, "step": 10644 }, { "epoch": 0.68, "grad_norm": 1.0700372683090658, "learning_rate": 2.4344613164487513e-06, "loss": 0.6478, "step": 10645 }, { "epoch": 0.68, "grad_norm": 1.1388601048460947, "learning_rate": 2.433571670835402e-06, "loss": 0.6868, "step": 10646 }, { "epoch": 0.68, "grad_norm": 1.5567978088320054, "learning_rate": 2.432682135521752e-06, "loss": 0.7542, "step": 10647 }, { "epoch": 0.68, "grad_norm": 1.714110085212153, "learning_rate": 2.4317927105460294e-06, "loss": 0.6192, "step": 10648 }, { "epoch": 0.68, "grad_norm": 1.5862612240037666, "learning_rate": 2.430903395946459e-06, "loss": 0.6368, "step": 10649 }, { "epoch": 0.68, "grad_norm": 1.458994114075265, "learning_rate": 2.4300141917612623e-06, "loss": 0.6295, "step": 10650 }, { "epoch": 0.68, "grad_norm": 1.6277837057577529, "learning_rate": 2.429125098028655e-06, "loss": 0.69, "step": 10651 }, { "epoch": 0.68, "grad_norm": 1.5852021057173988, "learning_rate": 2.4282361147868494e-06, "loss": 0.7089, "step": 10652 }, { "epoch": 0.68, "grad_norm": 1.4901003753122215, "learning_rate": 2.427347242074051e-06, "loss": 0.6888, "step": 10653 }, { "epoch": 0.68, "grad_norm": 1.8219742874121791, "learning_rate": 2.4264584799284614e-06, "loss": 0.6025, "step": 10654 }, { "epoch": 0.68, "grad_norm": 1.4845381263690527, "learning_rate": 2.4255698283882794e-06, "loss": 0.5899, "step": 10655 }, { "epoch": 0.68, "grad_norm": 1.0279326452174475, "learning_rate": 2.424681287491697e-06, "loss": 0.5794, "step": 10656 }, { "epoch": 0.68, "grad_norm": 1.4505869010537653, "learning_rate": 2.4237928572769016e-06, "loss": 0.6389, "step": 10657 }, { "epoch": 0.68, "grad_norm": 1.579921261970412, "learning_rate": 2.4229045377820752e-06, "loss": 0.6332, "step": 10658 }, { "epoch": 0.68, "grad_norm": 1.507404828263769, "learning_rate": 2.4220163290453973e-06, "loss": 0.5973, "step": 10659 }, { "epoch": 0.68, "grad_norm": 1.4265054893158386, "learning_rate": 2.4211282311050397e-06, "loss": 0.6538, "step": 10660 }, { "epoch": 0.68, "grad_norm": 1.5318759353483489, "learning_rate": 2.4202402439991708e-06, "loss": 0.6897, "step": 10661 }, { "epoch": 0.68, "grad_norm": 1.5295277425648983, "learning_rate": 2.4193523677659585e-06, "loss": 0.6879, "step": 10662 }, { "epoch": 0.68, "grad_norm": 1.477172527187159, "learning_rate": 2.4184646024435547e-06, "loss": 0.6191, "step": 10663 }, { "epoch": 0.68, "grad_norm": 1.6135793528008717, "learning_rate": 2.4175769480701222e-06, "loss": 0.6968, "step": 10664 }, { "epoch": 0.68, "grad_norm": 1.6114958499224104, "learning_rate": 2.416689404683802e-06, "loss": 0.6753, "step": 10665 }, { "epoch": 0.68, "grad_norm": 1.5133005237873538, "learning_rate": 2.4158019723227447e-06, "loss": 0.666, "step": 10666 }, { "epoch": 0.68, "grad_norm": 1.4120020227499546, "learning_rate": 2.4149146510250885e-06, "loss": 0.6823, "step": 10667 }, { "epoch": 0.68, "grad_norm": 1.5292074588928013, "learning_rate": 2.414027440828967e-06, "loss": 0.7623, "step": 10668 }, { "epoch": 0.68, "grad_norm": 1.6124579172289655, "learning_rate": 2.4131403417725156e-06, "loss": 0.7861, "step": 10669 }, { "epoch": 0.68, "grad_norm": 1.594047523432406, "learning_rate": 2.412253353893853e-06, "loss": 0.6809, "step": 10670 }, { "epoch": 0.68, "grad_norm": 1.3600760326245354, "learning_rate": 2.4113664772311076e-06, "loss": 0.6307, "step": 10671 }, { "epoch": 0.68, "grad_norm": 1.592524256236493, "learning_rate": 2.410479711822388e-06, "loss": 0.6838, "step": 10672 }, { "epoch": 0.68, "grad_norm": 1.4622033313810945, "learning_rate": 2.40959305770581e-06, "loss": 0.6499, "step": 10673 }, { "epoch": 0.68, "grad_norm": 1.6306006545590161, "learning_rate": 2.40870651491948e-06, "loss": 0.7405, "step": 10674 }, { "epoch": 0.68, "grad_norm": 1.5631271721674869, "learning_rate": 2.4078200835014983e-06, "loss": 0.6732, "step": 10675 }, { "epoch": 0.68, "grad_norm": 1.5952801066902615, "learning_rate": 2.4069337634899624e-06, "loss": 0.6568, "step": 10676 }, { "epoch": 0.68, "grad_norm": 1.537170340558671, "learning_rate": 2.4060475549229634e-06, "loss": 0.6413, "step": 10677 }, { "epoch": 0.68, "grad_norm": 1.4432172048858074, "learning_rate": 2.4051614578385937e-06, "loss": 0.6537, "step": 10678 }, { "epoch": 0.68, "grad_norm": 2.073043408913459, "learning_rate": 2.4042754722749274e-06, "loss": 0.6872, "step": 10679 }, { "epoch": 0.68, "grad_norm": 1.6360690284804291, "learning_rate": 2.403389598270051e-06, "loss": 0.6774, "step": 10680 }, { "epoch": 0.68, "grad_norm": 1.442181786362903, "learning_rate": 2.40250383586203e-06, "loss": 0.566, "step": 10681 }, { "epoch": 0.68, "grad_norm": 1.4831558785657006, "learning_rate": 2.4016181850889383e-06, "loss": 0.5946, "step": 10682 }, { "epoch": 0.68, "grad_norm": 1.5147861629450061, "learning_rate": 2.400732645988836e-06, "loss": 0.6884, "step": 10683 }, { "epoch": 0.68, "grad_norm": 1.576613948516582, "learning_rate": 2.3998472185997813e-06, "loss": 0.6088, "step": 10684 }, { "epoch": 0.68, "grad_norm": 1.6552852200930248, "learning_rate": 2.398961902959833e-06, "loss": 0.7102, "step": 10685 }, { "epoch": 0.68, "grad_norm": 1.5172148686987972, "learning_rate": 2.398076699107034e-06, "loss": 0.708, "step": 10686 }, { "epoch": 0.68, "grad_norm": 1.646717601599578, "learning_rate": 2.3971916070794337e-06, "loss": 0.6893, "step": 10687 }, { "epoch": 0.68, "grad_norm": 1.5430174005991049, "learning_rate": 2.3963066269150653e-06, "loss": 0.7117, "step": 10688 }, { "epoch": 0.68, "grad_norm": 1.3035328680628033, "learning_rate": 2.3954217586519692e-06, "loss": 0.6155, "step": 10689 }, { "epoch": 0.68, "grad_norm": 1.0995512828192404, "learning_rate": 2.394537002328172e-06, "loss": 0.5986, "step": 10690 }, { "epoch": 0.68, "grad_norm": 1.8912510131003213, "learning_rate": 2.3936523579817005e-06, "loss": 0.6198, "step": 10691 }, { "epoch": 0.68, "grad_norm": 1.5475995142711425, "learning_rate": 2.3927678256505737e-06, "loss": 0.6516, "step": 10692 }, { "epoch": 0.68, "grad_norm": 1.5929538513952213, "learning_rate": 2.391883405372805e-06, "loss": 0.628, "step": 10693 }, { "epoch": 0.68, "grad_norm": 1.5643136473171733, "learning_rate": 2.3909990971864117e-06, "loss": 0.7433, "step": 10694 }, { "epoch": 0.68, "grad_norm": 1.1273444360600713, "learning_rate": 2.3901149011293906e-06, "loss": 0.6238, "step": 10695 }, { "epoch": 0.68, "grad_norm": 1.9062773888099185, "learning_rate": 2.3892308172397487e-06, "loss": 0.636, "step": 10696 }, { "epoch": 0.68, "grad_norm": 1.5524051056689014, "learning_rate": 2.388346845555481e-06, "loss": 0.7168, "step": 10697 }, { "epoch": 0.68, "grad_norm": 1.8393029865088117, "learning_rate": 2.3874629861145775e-06, "loss": 0.7456, "step": 10698 }, { "epoch": 0.68, "grad_norm": 1.4987569996807062, "learning_rate": 2.386579238955025e-06, "loss": 0.5612, "step": 10699 }, { "epoch": 0.68, "grad_norm": 1.482948906913354, "learning_rate": 2.3856956041148056e-06, "loss": 0.6169, "step": 10700 }, { "epoch": 0.68, "grad_norm": 1.8967273858471614, "learning_rate": 2.384812081631895e-06, "loss": 0.606, "step": 10701 }, { "epoch": 0.68, "grad_norm": 1.3947257735354077, "learning_rate": 2.383928671544265e-06, "loss": 0.6802, "step": 10702 }, { "epoch": 0.69, "grad_norm": 1.3896863344769077, "learning_rate": 2.3830453738898852e-06, "loss": 0.6473, "step": 10703 }, { "epoch": 0.69, "grad_norm": 1.660672231913052, "learning_rate": 2.382162188706716e-06, "loss": 0.6842, "step": 10704 }, { "epoch": 0.69, "grad_norm": 2.087073691939343, "learning_rate": 2.3812791160327154e-06, "loss": 0.594, "step": 10705 }, { "epoch": 0.69, "grad_norm": 1.4875642987161104, "learning_rate": 2.3803961559058355e-06, "loss": 0.659, "step": 10706 }, { "epoch": 0.69, "grad_norm": 2.065365997650555, "learning_rate": 2.379513308364024e-06, "loss": 0.6814, "step": 10707 }, { "epoch": 0.69, "grad_norm": 3.1446461359623146, "learning_rate": 2.378630573445225e-06, "loss": 0.6058, "step": 10708 }, { "epoch": 0.69, "grad_norm": 1.515985195922938, "learning_rate": 2.377747951187375e-06, "loss": 0.6289, "step": 10709 }, { "epoch": 0.69, "grad_norm": 1.5577488359587046, "learning_rate": 2.376865441628406e-06, "loss": 0.63, "step": 10710 }, { "epoch": 0.69, "grad_norm": 1.7084245979933583, "learning_rate": 2.3759830448062497e-06, "loss": 0.7453, "step": 10711 }, { "epoch": 0.69, "grad_norm": 2.366460395317822, "learning_rate": 2.3751007607588294e-06, "loss": 0.6294, "step": 10712 }, { "epoch": 0.69, "grad_norm": 1.1771730185488578, "learning_rate": 2.3742185895240623e-06, "loss": 0.593, "step": 10713 }, { "epoch": 0.69, "grad_norm": 1.673169154518427, "learning_rate": 2.3733365311398628e-06, "loss": 0.6318, "step": 10714 }, { "epoch": 0.69, "grad_norm": 1.6092740701532893, "learning_rate": 2.3724545856441394e-06, "loss": 0.6232, "step": 10715 }, { "epoch": 0.69, "grad_norm": 1.73789331012495, "learning_rate": 2.3715727530747973e-06, "loss": 0.6908, "step": 10716 }, { "epoch": 0.69, "grad_norm": 1.6727560029176651, "learning_rate": 2.370691033469735e-06, "loss": 0.661, "step": 10717 }, { "epoch": 0.69, "grad_norm": 1.678730386529497, "learning_rate": 2.3698094268668466e-06, "loss": 0.5865, "step": 10718 }, { "epoch": 0.69, "grad_norm": 1.548206727911982, "learning_rate": 2.368927933304021e-06, "loss": 0.6193, "step": 10719 }, { "epoch": 0.69, "grad_norm": 1.5100956541196289, "learning_rate": 2.368046552819146e-06, "loss": 0.647, "step": 10720 }, { "epoch": 0.69, "grad_norm": 1.5168934738721773, "learning_rate": 2.3671652854500995e-06, "loss": 0.6647, "step": 10721 }, { "epoch": 0.69, "grad_norm": 1.6731802659377941, "learning_rate": 2.3662841312347563e-06, "loss": 0.6858, "step": 10722 }, { "epoch": 0.69, "grad_norm": 1.6303861493710474, "learning_rate": 2.3654030902109877e-06, "loss": 0.5939, "step": 10723 }, { "epoch": 0.69, "grad_norm": 1.5013265798546838, "learning_rate": 2.3645221624166577e-06, "loss": 0.6131, "step": 10724 }, { "epoch": 0.69, "grad_norm": 1.5717197155729965, "learning_rate": 2.3636413478896276e-06, "loss": 0.6079, "step": 10725 }, { "epoch": 0.69, "grad_norm": 1.6204578996226955, "learning_rate": 2.3627606466677506e-06, "loss": 0.7132, "step": 10726 }, { "epoch": 0.69, "grad_norm": 1.120909559421487, "learning_rate": 2.361880058788883e-06, "loss": 0.694, "step": 10727 }, { "epoch": 0.69, "grad_norm": 1.6901169316989255, "learning_rate": 2.3609995842908636e-06, "loss": 0.6866, "step": 10728 }, { "epoch": 0.69, "grad_norm": 1.4629461267421928, "learning_rate": 2.3601192232115376e-06, "loss": 0.6308, "step": 10729 }, { "epoch": 0.69, "grad_norm": 1.5914806254087956, "learning_rate": 2.359238975588741e-06, "loss": 0.6235, "step": 10730 }, { "epoch": 0.69, "grad_norm": 1.7645768099539532, "learning_rate": 2.3583588414603046e-06, "loss": 0.6585, "step": 10731 }, { "epoch": 0.69, "grad_norm": 1.6994704932368425, "learning_rate": 2.357478820864054e-06, "loss": 0.7709, "step": 10732 }, { "epoch": 0.69, "grad_norm": 1.5751721815933362, "learning_rate": 2.3565989138378094e-06, "loss": 0.6708, "step": 10733 }, { "epoch": 0.69, "grad_norm": 1.51953431791409, "learning_rate": 2.3557191204193923e-06, "loss": 0.6994, "step": 10734 }, { "epoch": 0.69, "grad_norm": 1.2170495310647038, "learning_rate": 2.3548394406466074e-06, "loss": 0.7153, "step": 10735 }, { "epoch": 0.69, "grad_norm": 1.5523101537157993, "learning_rate": 2.353959874557269e-06, "loss": 0.7217, "step": 10736 }, { "epoch": 0.69, "grad_norm": 1.7205020160652411, "learning_rate": 2.3530804221891707e-06, "loss": 0.7982, "step": 10737 }, { "epoch": 0.69, "grad_norm": 1.5243179419058248, "learning_rate": 2.3522010835801163e-06, "loss": 0.711, "step": 10738 }, { "epoch": 0.69, "grad_norm": 1.2467265536139052, "learning_rate": 2.3513218587678952e-06, "loss": 0.6726, "step": 10739 }, { "epoch": 0.69, "grad_norm": 1.4982670189364193, "learning_rate": 2.3504427477902937e-06, "loss": 0.6356, "step": 10740 }, { "epoch": 0.69, "grad_norm": 1.8178264467376932, "learning_rate": 2.349563750685099e-06, "loss": 0.7083, "step": 10741 }, { "epoch": 0.69, "grad_norm": 1.5285482838719893, "learning_rate": 2.348684867490081e-06, "loss": 0.7133, "step": 10742 }, { "epoch": 0.69, "grad_norm": 1.6217773777227116, "learning_rate": 2.3478060982430193e-06, "loss": 0.6755, "step": 10743 }, { "epoch": 0.69, "grad_norm": 1.4199050285342447, "learning_rate": 2.3469274429816758e-06, "loss": 0.6644, "step": 10744 }, { "epoch": 0.69, "grad_norm": 2.3821686154379464, "learning_rate": 2.3460489017438176e-06, "loss": 0.6449, "step": 10745 }, { "epoch": 0.69, "grad_norm": 1.3622372708661725, "learning_rate": 2.3451704745672006e-06, "loss": 0.732, "step": 10746 }, { "epoch": 0.69, "grad_norm": 1.5988960131332977, "learning_rate": 2.3442921614895783e-06, "loss": 0.6995, "step": 10747 }, { "epoch": 0.69, "grad_norm": 1.5575156862308608, "learning_rate": 2.3434139625486985e-06, "loss": 0.5287, "step": 10748 }, { "epoch": 0.69, "grad_norm": 1.5532111634023564, "learning_rate": 2.342535877782302e-06, "loss": 0.644, "step": 10749 }, { "epoch": 0.69, "grad_norm": 1.665942640034258, "learning_rate": 2.3416579072281333e-06, "loss": 0.6009, "step": 10750 }, { "epoch": 0.69, "grad_norm": 1.1726490315581926, "learning_rate": 2.340780050923918e-06, "loss": 0.5918, "step": 10751 }, { "epoch": 0.69, "grad_norm": 1.3611369850800201, "learning_rate": 2.339902308907391e-06, "loss": 0.6639, "step": 10752 }, { "epoch": 0.69, "grad_norm": 1.7889318855532275, "learning_rate": 2.339024681216272e-06, "loss": 0.7077, "step": 10753 }, { "epoch": 0.69, "grad_norm": 1.5813196909568585, "learning_rate": 2.338147167888281e-06, "loss": 0.6715, "step": 10754 }, { "epoch": 0.69, "grad_norm": 1.8964442698765256, "learning_rate": 2.3372697689611317e-06, "loss": 0.6344, "step": 10755 }, { "epoch": 0.69, "grad_norm": 1.4261946411940307, "learning_rate": 2.3363924844725326e-06, "loss": 0.6036, "step": 10756 }, { "epoch": 0.69, "grad_norm": 1.4404734431325765, "learning_rate": 2.3355153144601873e-06, "loss": 0.6524, "step": 10757 }, { "epoch": 0.69, "grad_norm": 1.483007121862165, "learning_rate": 2.3346382589617933e-06, "loss": 0.6301, "step": 10758 }, { "epoch": 0.69, "grad_norm": 1.8628014184526558, "learning_rate": 2.3337613180150497e-06, "loss": 0.6632, "step": 10759 }, { "epoch": 0.69, "grad_norm": 3.0410237907199056, "learning_rate": 2.332884491657639e-06, "loss": 0.6205, "step": 10760 }, { "epoch": 0.69, "grad_norm": 1.4276643763053152, "learning_rate": 2.3320077799272493e-06, "loss": 0.6933, "step": 10761 }, { "epoch": 0.69, "grad_norm": 1.3565349098790713, "learning_rate": 2.3311311828615594e-06, "loss": 0.6348, "step": 10762 }, { "epoch": 0.69, "grad_norm": 1.53543660588512, "learning_rate": 2.3302547004982434e-06, "loss": 0.5872, "step": 10763 }, { "epoch": 0.69, "grad_norm": 1.6196229033492526, "learning_rate": 2.3293783328749705e-06, "loss": 0.7151, "step": 10764 }, { "epoch": 0.69, "grad_norm": 1.710417770525947, "learning_rate": 2.328502080029405e-06, "loss": 0.7978, "step": 10765 }, { "epoch": 0.69, "grad_norm": 1.6161356708256434, "learning_rate": 2.3276259419992066e-06, "loss": 0.6508, "step": 10766 }, { "epoch": 0.69, "grad_norm": 1.5413688968084096, "learning_rate": 2.326749918822028e-06, "loss": 0.6452, "step": 10767 }, { "epoch": 0.69, "grad_norm": 1.5932681652760359, "learning_rate": 2.325874010535522e-06, "loss": 0.7015, "step": 10768 }, { "epoch": 0.69, "grad_norm": 1.4666146110019562, "learning_rate": 2.3249982171773322e-06, "loss": 0.5873, "step": 10769 }, { "epoch": 0.69, "grad_norm": 1.5567893767585865, "learning_rate": 2.324122538785098e-06, "loss": 0.676, "step": 10770 }, { "epoch": 0.69, "grad_norm": 1.1557898956769468, "learning_rate": 2.323246975396454e-06, "loss": 0.5643, "step": 10771 }, { "epoch": 0.69, "grad_norm": 1.4743254995441686, "learning_rate": 2.322371527049031e-06, "loss": 0.6535, "step": 10772 }, { "epoch": 0.69, "grad_norm": 1.5448320294384876, "learning_rate": 2.321496193780453e-06, "loss": 0.6663, "step": 10773 }, { "epoch": 0.69, "grad_norm": 1.1832899106382733, "learning_rate": 2.3206209756283403e-06, "loss": 0.5585, "step": 10774 }, { "epoch": 0.69, "grad_norm": 1.6017964631348305, "learning_rate": 2.3197458726303064e-06, "loss": 0.7051, "step": 10775 }, { "epoch": 0.69, "grad_norm": 1.186500005239614, "learning_rate": 2.3188708848239638e-06, "loss": 0.5991, "step": 10776 }, { "epoch": 0.69, "grad_norm": 1.663834021768817, "learning_rate": 2.3179960122469173e-06, "loss": 0.8024, "step": 10777 }, { "epoch": 0.69, "grad_norm": 1.7680128055464457, "learning_rate": 2.317121254936767e-06, "loss": 0.7629, "step": 10778 }, { "epoch": 0.69, "grad_norm": 1.5626289251181946, "learning_rate": 2.3162466129311074e-06, "loss": 0.6044, "step": 10779 }, { "epoch": 0.69, "grad_norm": 2.1031132303128546, "learning_rate": 2.3153720862675286e-06, "loss": 0.7402, "step": 10780 }, { "epoch": 0.69, "grad_norm": 2.250051543827379, "learning_rate": 2.314497674983617e-06, "loss": 0.6386, "step": 10781 }, { "epoch": 0.69, "grad_norm": 1.5515852337492846, "learning_rate": 2.31362337911695e-06, "loss": 0.5594, "step": 10782 }, { "epoch": 0.69, "grad_norm": 1.932030938218573, "learning_rate": 2.3127491987051086e-06, "loss": 0.665, "step": 10783 }, { "epoch": 0.69, "grad_norm": 1.637524096455149, "learning_rate": 2.311875133785657e-06, "loss": 0.6631, "step": 10784 }, { "epoch": 0.69, "grad_norm": 1.7468092857754376, "learning_rate": 2.311001184396165e-06, "loss": 0.7304, "step": 10785 }, { "epoch": 0.69, "grad_norm": 1.4397346961856012, "learning_rate": 2.3101273505741922e-06, "loss": 0.5922, "step": 10786 }, { "epoch": 0.69, "grad_norm": 1.441239311944375, "learning_rate": 2.3092536323572933e-06, "loss": 0.6525, "step": 10787 }, { "epoch": 0.69, "grad_norm": 1.6445073822722762, "learning_rate": 2.3083800297830194e-06, "loss": 0.6341, "step": 10788 }, { "epoch": 0.69, "grad_norm": 1.3914203054415395, "learning_rate": 2.307506542888916e-06, "loss": 0.4889, "step": 10789 }, { "epoch": 0.69, "grad_norm": 1.4285108238980146, "learning_rate": 2.3066331717125235e-06, "loss": 0.7254, "step": 10790 }, { "epoch": 0.69, "grad_norm": 1.844885399283837, "learning_rate": 2.305759916291376e-06, "loss": 0.6799, "step": 10791 }, { "epoch": 0.69, "grad_norm": 1.6070629951383115, "learning_rate": 2.3048867766630096e-06, "loss": 0.6596, "step": 10792 }, { "epoch": 0.69, "grad_norm": 1.5673350493448943, "learning_rate": 2.3040137528649424e-06, "loss": 0.7014, "step": 10793 }, { "epoch": 0.69, "grad_norm": 1.5773783891682673, "learning_rate": 2.3031408449347017e-06, "loss": 0.7084, "step": 10794 }, { "epoch": 0.69, "grad_norm": 1.0498907729420608, "learning_rate": 2.3022680529097995e-06, "loss": 0.6244, "step": 10795 }, { "epoch": 0.69, "grad_norm": 1.5483797510889428, "learning_rate": 2.301395376827748e-06, "loss": 0.614, "step": 10796 }, { "epoch": 0.69, "grad_norm": 1.5371817354562956, "learning_rate": 2.300522816726053e-06, "loss": 0.6559, "step": 10797 }, { "epoch": 0.69, "grad_norm": 1.6117673720871788, "learning_rate": 2.2996503726422133e-06, "loss": 0.6485, "step": 10798 }, { "epoch": 0.69, "grad_norm": 1.2778407859869492, "learning_rate": 2.298778044613729e-06, "loss": 0.6242, "step": 10799 }, { "epoch": 0.69, "grad_norm": 1.3612809469169822, "learning_rate": 2.2979058326780855e-06, "loss": 0.7028, "step": 10800 }, { "epoch": 0.69, "grad_norm": 1.5852590196867304, "learning_rate": 2.2970337368727753e-06, "loss": 0.5521, "step": 10801 }, { "epoch": 0.69, "grad_norm": 1.8577717567908667, "learning_rate": 2.2961617572352712e-06, "loss": 0.6664, "step": 10802 }, { "epoch": 0.69, "grad_norm": 1.5005838721745466, "learning_rate": 2.2952898938030554e-06, "loss": 0.6192, "step": 10803 }, { "epoch": 0.69, "grad_norm": 1.7087935322183738, "learning_rate": 2.294418146613596e-06, "loss": 0.6254, "step": 10804 }, { "epoch": 0.69, "grad_norm": 1.6585861164457396, "learning_rate": 2.293546515704358e-06, "loss": 0.696, "step": 10805 }, { "epoch": 0.69, "grad_norm": 1.1325940217895059, "learning_rate": 2.2926750011128074e-06, "loss": 0.6676, "step": 10806 }, { "epoch": 0.69, "grad_norm": 1.5938576789563854, "learning_rate": 2.2918036028763928e-06, "loss": 0.6164, "step": 10807 }, { "epoch": 0.69, "grad_norm": 1.5525832733119207, "learning_rate": 2.2909323210325724e-06, "loss": 0.7315, "step": 10808 }, { "epoch": 0.69, "grad_norm": 1.6309139816222018, "learning_rate": 2.290061155618784e-06, "loss": 0.7311, "step": 10809 }, { "epoch": 0.69, "grad_norm": 1.1510740858362976, "learning_rate": 2.2891901066724755e-06, "loss": 0.6485, "step": 10810 }, { "epoch": 0.69, "grad_norm": 1.0473422090189162, "learning_rate": 2.2883191742310795e-06, "loss": 0.639, "step": 10811 }, { "epoch": 0.69, "grad_norm": 1.5452741206656928, "learning_rate": 2.2874483583320274e-06, "loss": 0.6618, "step": 10812 }, { "epoch": 0.69, "grad_norm": 1.578929197284171, "learning_rate": 2.2865776590127447e-06, "loss": 0.6371, "step": 10813 }, { "epoch": 0.69, "grad_norm": 1.4712855734698758, "learning_rate": 2.285707076310651e-06, "loss": 0.6499, "step": 10814 }, { "epoch": 0.69, "grad_norm": 1.4597837656235075, "learning_rate": 2.2848366102631674e-06, "loss": 0.6802, "step": 10815 }, { "epoch": 0.69, "grad_norm": 2.324462047166262, "learning_rate": 2.2839662609076975e-06, "loss": 0.6751, "step": 10816 }, { "epoch": 0.69, "grad_norm": 1.6270269626615972, "learning_rate": 2.283096028281652e-06, "loss": 0.6818, "step": 10817 }, { "epoch": 0.69, "grad_norm": 1.586852739710634, "learning_rate": 2.28222591242243e-06, "loss": 0.7027, "step": 10818 }, { "epoch": 0.69, "grad_norm": 1.5057004016171311, "learning_rate": 2.281355913367428e-06, "loss": 0.667, "step": 10819 }, { "epoch": 0.69, "grad_norm": 1.7484646146469773, "learning_rate": 2.2804860311540358e-06, "loss": 0.617, "step": 10820 }, { "epoch": 0.69, "grad_norm": 1.8519933158550754, "learning_rate": 2.2796162658196397e-06, "loss": 0.7251, "step": 10821 }, { "epoch": 0.69, "grad_norm": 1.7741174033699143, "learning_rate": 2.2787466174016197e-06, "loss": 0.6971, "step": 10822 }, { "epoch": 0.69, "grad_norm": 1.4504244624699045, "learning_rate": 2.2778770859373504e-06, "loss": 0.6003, "step": 10823 }, { "epoch": 0.69, "grad_norm": 1.1099847160214755, "learning_rate": 2.2770076714642066e-06, "loss": 0.6096, "step": 10824 }, { "epoch": 0.69, "grad_norm": 1.5362301358273835, "learning_rate": 2.27613837401955e-06, "loss": 0.6772, "step": 10825 }, { "epoch": 0.69, "grad_norm": 1.5895662694555412, "learning_rate": 2.2752691936407436e-06, "loss": 0.6873, "step": 10826 }, { "epoch": 0.69, "grad_norm": 1.6026633459430095, "learning_rate": 2.2744001303651407e-06, "loss": 0.6066, "step": 10827 }, { "epoch": 0.69, "grad_norm": 1.6516580833790973, "learning_rate": 2.273531184230094e-06, "loss": 0.7574, "step": 10828 }, { "epoch": 0.69, "grad_norm": 2.1459160268116806, "learning_rate": 2.2726623552729473e-06, "loss": 0.6142, "step": 10829 }, { "epoch": 0.69, "grad_norm": 1.4800480703023609, "learning_rate": 2.2717936435310417e-06, "loss": 0.6171, "step": 10830 }, { "epoch": 0.69, "grad_norm": 1.4953098224689074, "learning_rate": 2.2709250490417124e-06, "loss": 0.5976, "step": 10831 }, { "epoch": 0.69, "grad_norm": 1.553105010668183, "learning_rate": 2.2700565718422885e-06, "loss": 0.6858, "step": 10832 }, { "epoch": 0.69, "grad_norm": 1.5050283170067378, "learning_rate": 2.2691882119700983e-06, "loss": 0.7362, "step": 10833 }, { "epoch": 0.69, "grad_norm": 1.4349415542354875, "learning_rate": 2.2683199694624604e-06, "loss": 0.6389, "step": 10834 }, { "epoch": 0.69, "grad_norm": 1.484991932870503, "learning_rate": 2.2674518443566908e-06, "loss": 0.6837, "step": 10835 }, { "epoch": 0.69, "grad_norm": 1.5809882022297037, "learning_rate": 2.2665838366900983e-06, "loss": 0.6537, "step": 10836 }, { "epoch": 0.69, "grad_norm": 1.4816679655716236, "learning_rate": 2.265715946499989e-06, "loss": 0.6384, "step": 10837 }, { "epoch": 0.69, "grad_norm": 1.3437357831143222, "learning_rate": 2.264848173823663e-06, "loss": 0.6212, "step": 10838 }, { "epoch": 0.69, "grad_norm": 1.3756041096119256, "learning_rate": 2.2639805186984148e-06, "loss": 0.5956, "step": 10839 }, { "epoch": 0.69, "grad_norm": 1.5116927018297182, "learning_rate": 2.2631129811615334e-06, "loss": 0.5991, "step": 10840 }, { "epoch": 0.69, "grad_norm": 1.7184404041611632, "learning_rate": 2.2622455612503064e-06, "loss": 0.6845, "step": 10841 }, { "epoch": 0.69, "grad_norm": 1.319796401885273, "learning_rate": 2.2613782590020126e-06, "loss": 0.6213, "step": 10842 }, { "epoch": 0.69, "grad_norm": 1.5459036042313834, "learning_rate": 2.2605110744539266e-06, "loss": 0.6337, "step": 10843 }, { "epoch": 0.69, "grad_norm": 1.4698652395400298, "learning_rate": 2.2596440076433177e-06, "loss": 0.5621, "step": 10844 }, { "epoch": 0.69, "grad_norm": 1.5095639867588222, "learning_rate": 2.258777058607452e-06, "loss": 0.6828, "step": 10845 }, { "epoch": 0.69, "grad_norm": 1.6130199730633048, "learning_rate": 2.2579102273835877e-06, "loss": 0.6944, "step": 10846 }, { "epoch": 0.69, "grad_norm": 1.3581790093673043, "learning_rate": 2.257043514008978e-06, "loss": 0.6342, "step": 10847 }, { "epoch": 0.69, "grad_norm": 1.619520812415391, "learning_rate": 2.2561769185208783e-06, "loss": 0.695, "step": 10848 }, { "epoch": 0.69, "grad_norm": 1.4382246719567173, "learning_rate": 2.255310440956525e-06, "loss": 0.6898, "step": 10849 }, { "epoch": 0.69, "grad_norm": 2.283034048289791, "learning_rate": 2.2544440813531637e-06, "loss": 0.6938, "step": 10850 }, { "epoch": 0.69, "grad_norm": 1.6429996783900866, "learning_rate": 2.2535778397480263e-06, "loss": 0.6182, "step": 10851 }, { "epoch": 0.69, "grad_norm": 1.7891524308634847, "learning_rate": 2.2527117161783426e-06, "loss": 0.7392, "step": 10852 }, { "epoch": 0.69, "grad_norm": 1.5485364969044688, "learning_rate": 2.251845710681336e-06, "loss": 0.7841, "step": 10853 }, { "epoch": 0.69, "grad_norm": 1.6160878258036608, "learning_rate": 2.250979823294225e-06, "loss": 0.7469, "step": 10854 }, { "epoch": 0.69, "grad_norm": 1.668490644374097, "learning_rate": 2.250114054054228e-06, "loss": 0.6967, "step": 10855 }, { "epoch": 0.69, "grad_norm": 1.7620499386635256, "learning_rate": 2.249248402998547e-06, "loss": 0.7535, "step": 10856 }, { "epoch": 0.69, "grad_norm": 1.6608368909884041, "learning_rate": 2.2483828701643933e-06, "loss": 0.6883, "step": 10857 }, { "epoch": 0.69, "grad_norm": 1.9107499701003867, "learning_rate": 2.2475174555889577e-06, "loss": 0.6959, "step": 10858 }, { "epoch": 0.7, "grad_norm": 1.6927657487023948, "learning_rate": 2.2466521593094404e-06, "loss": 0.6918, "step": 10859 }, { "epoch": 0.7, "grad_norm": 1.700440445809568, "learning_rate": 2.245786981363028e-06, "loss": 0.695, "step": 10860 }, { "epoch": 0.7, "grad_norm": 1.6448173829342496, "learning_rate": 2.2449219217869013e-06, "loss": 0.6316, "step": 10861 }, { "epoch": 0.7, "grad_norm": 1.5692072801261123, "learning_rate": 2.244056980618245e-06, "loss": 0.6403, "step": 10862 }, { "epoch": 0.7, "grad_norm": 1.5280547148828312, "learning_rate": 2.243192157894225e-06, "loss": 0.6935, "step": 10863 }, { "epoch": 0.7, "grad_norm": 1.1721287493417278, "learning_rate": 2.2423274536520172e-06, "loss": 0.5958, "step": 10864 }, { "epoch": 0.7, "grad_norm": 1.4427759489207344, "learning_rate": 2.241462867928777e-06, "loss": 0.7011, "step": 10865 }, { "epoch": 0.7, "grad_norm": 1.5552580226002273, "learning_rate": 2.2405984007616676e-06, "loss": 0.6331, "step": 10866 }, { "epoch": 0.7, "grad_norm": 1.465431652632133, "learning_rate": 2.239734052187841e-06, "loss": 0.5727, "step": 10867 }, { "epoch": 0.7, "grad_norm": 1.644345336875929, "learning_rate": 2.238869822244445e-06, "loss": 0.65, "step": 10868 }, { "epoch": 0.7, "grad_norm": 1.8386290673448384, "learning_rate": 2.2380057109686213e-06, "loss": 0.8528, "step": 10869 }, { "epoch": 0.7, "grad_norm": 1.7511618123754624, "learning_rate": 2.2371417183975076e-06, "loss": 0.6474, "step": 10870 }, { "epoch": 0.7, "grad_norm": 1.6297407716264978, "learning_rate": 2.2362778445682403e-06, "loss": 0.6769, "step": 10871 }, { "epoch": 0.7, "grad_norm": 2.0967357423140744, "learning_rate": 2.2354140895179403e-06, "loss": 0.7164, "step": 10872 }, { "epoch": 0.7, "grad_norm": 1.4090505206192214, "learning_rate": 2.234550453283737e-06, "loss": 0.641, "step": 10873 }, { "epoch": 0.7, "grad_norm": 1.5067950112069652, "learning_rate": 2.2336869359027406e-06, "loss": 0.6314, "step": 10874 }, { "epoch": 0.7, "grad_norm": 2.05670506052585, "learning_rate": 2.2328235374120694e-06, "loss": 0.6041, "step": 10875 }, { "epoch": 0.7, "grad_norm": 1.8200386031862335, "learning_rate": 2.231960257848827e-06, "loss": 0.6827, "step": 10876 }, { "epoch": 0.7, "grad_norm": 1.5495102559084544, "learning_rate": 2.231097097250115e-06, "loss": 0.6216, "step": 10877 }, { "epoch": 0.7, "grad_norm": 0.9937474202856467, "learning_rate": 2.230234055653035e-06, "loss": 0.6228, "step": 10878 }, { "epoch": 0.7, "grad_norm": 1.6592727271712104, "learning_rate": 2.229371133094671e-06, "loss": 0.703, "step": 10879 }, { "epoch": 0.7, "grad_norm": 1.571742673958911, "learning_rate": 2.2285083296121185e-06, "loss": 0.6834, "step": 10880 }, { "epoch": 0.7, "grad_norm": 1.1757553527468179, "learning_rate": 2.22764564524245e-06, "loss": 0.694, "step": 10881 }, { "epoch": 0.7, "grad_norm": 1.356464632480191, "learning_rate": 2.226783080022748e-06, "loss": 0.5676, "step": 10882 }, { "epoch": 0.7, "grad_norm": 1.3362151837802658, "learning_rate": 2.225920633990082e-06, "loss": 0.6767, "step": 10883 }, { "epoch": 0.7, "grad_norm": 1.5872166935111296, "learning_rate": 2.225058307181518e-06, "loss": 0.6093, "step": 10884 }, { "epoch": 0.7, "grad_norm": 1.6900572557685696, "learning_rate": 2.2241960996341166e-06, "loss": 0.6927, "step": 10885 }, { "epoch": 0.7, "grad_norm": 1.312361620743324, "learning_rate": 2.2233340113849343e-06, "loss": 0.6765, "step": 10886 }, { "epoch": 0.7, "grad_norm": 1.4966910182962003, "learning_rate": 2.2224720424710222e-06, "loss": 0.6335, "step": 10887 }, { "epoch": 0.7, "grad_norm": 1.4258670145245256, "learning_rate": 2.221610192929423e-06, "loss": 0.6027, "step": 10888 }, { "epoch": 0.7, "grad_norm": 1.5082070236388847, "learning_rate": 2.2207484627971817e-06, "loss": 0.5963, "step": 10889 }, { "epoch": 0.7, "grad_norm": 1.6906500173036363, "learning_rate": 2.219886852111331e-06, "loss": 0.6831, "step": 10890 }, { "epoch": 0.7, "grad_norm": 1.4674864525768496, "learning_rate": 2.2190253609089014e-06, "loss": 0.6194, "step": 10891 }, { "epoch": 0.7, "grad_norm": 1.7335578716676048, "learning_rate": 2.2181639892269183e-06, "loss": 0.6756, "step": 10892 }, { "epoch": 0.7, "grad_norm": 1.5851080743392474, "learning_rate": 2.217302737102402e-06, "loss": 0.618, "step": 10893 }, { "epoch": 0.7, "grad_norm": 1.5994363755728034, "learning_rate": 2.2164416045723662e-06, "loss": 0.6321, "step": 10894 }, { "epoch": 0.7, "grad_norm": 1.6020054936337282, "learning_rate": 2.2155805916738215e-06, "loss": 0.7454, "step": 10895 }, { "epoch": 0.7, "grad_norm": 1.5634452584700518, "learning_rate": 2.21471969844377e-06, "loss": 0.7237, "step": 10896 }, { "epoch": 0.7, "grad_norm": 1.4448885998441832, "learning_rate": 2.2138589249192156e-06, "loss": 0.5759, "step": 10897 }, { "epoch": 0.7, "grad_norm": 1.4055311663139902, "learning_rate": 2.2129982711371495e-06, "loss": 0.6625, "step": 10898 }, { "epoch": 0.7, "grad_norm": 1.706958335191356, "learning_rate": 2.212137737134562e-06, "loss": 0.7093, "step": 10899 }, { "epoch": 0.7, "grad_norm": 1.5784654403933371, "learning_rate": 2.211277322948436e-06, "loss": 0.6531, "step": 10900 }, { "epoch": 0.7, "grad_norm": 1.5449634045790552, "learning_rate": 2.2104170286157506e-06, "loss": 0.6607, "step": 10901 }, { "epoch": 0.7, "grad_norm": 1.3538851073636193, "learning_rate": 2.2095568541734804e-06, "loss": 0.6436, "step": 10902 }, { "epoch": 0.7, "grad_norm": 1.090268739287521, "learning_rate": 2.20869679965859e-06, "loss": 0.6145, "step": 10903 }, { "epoch": 0.7, "grad_norm": 1.6425548017041072, "learning_rate": 2.2078368651080506e-06, "loss": 0.7226, "step": 10904 }, { "epoch": 0.7, "grad_norm": 1.5290869449092717, "learning_rate": 2.206977050558811e-06, "loss": 0.6751, "step": 10905 }, { "epoch": 0.7, "grad_norm": 1.497231318800626, "learning_rate": 2.2061173560478317e-06, "loss": 0.7777, "step": 10906 }, { "epoch": 0.7, "grad_norm": 1.753082896479333, "learning_rate": 2.205257781612057e-06, "loss": 0.5922, "step": 10907 }, { "epoch": 0.7, "grad_norm": 1.0925611097614607, "learning_rate": 2.204398327288431e-06, "loss": 0.567, "step": 10908 }, { "epoch": 0.7, "grad_norm": 1.6279220311669294, "learning_rate": 2.2035389931138896e-06, "loss": 0.7391, "step": 10909 }, { "epoch": 0.7, "grad_norm": 1.4467385160629505, "learning_rate": 2.202679779125366e-06, "loss": 0.7135, "step": 10910 }, { "epoch": 0.7, "grad_norm": 1.5700641071071164, "learning_rate": 2.201820685359788e-06, "loss": 0.6124, "step": 10911 }, { "epoch": 0.7, "grad_norm": 1.4300393396364761, "learning_rate": 2.2009617118540755e-06, "loss": 0.6478, "step": 10912 }, { "epoch": 0.7, "grad_norm": 1.5551699430468857, "learning_rate": 2.20010285864515e-06, "loss": 0.6125, "step": 10913 }, { "epoch": 0.7, "grad_norm": 1.7179037475245957, "learning_rate": 2.199244125769917e-06, "loss": 0.6931, "step": 10914 }, { "epoch": 0.7, "grad_norm": 1.5951883628539705, "learning_rate": 2.198385513265289e-06, "loss": 0.7152, "step": 10915 }, { "epoch": 0.7, "grad_norm": 1.593008544766181, "learning_rate": 2.1975270211681634e-06, "loss": 0.6314, "step": 10916 }, { "epoch": 0.7, "grad_norm": 1.409054692405353, "learning_rate": 2.1966686495154375e-06, "loss": 0.7217, "step": 10917 }, { "epoch": 0.7, "grad_norm": 1.5884420345186936, "learning_rate": 2.1958103983440034e-06, "loss": 0.7162, "step": 10918 }, { "epoch": 0.7, "grad_norm": 1.5208039616828655, "learning_rate": 2.194952267690744e-06, "loss": 0.6995, "step": 10919 }, { "epoch": 0.7, "grad_norm": 1.3973813067266694, "learning_rate": 2.194094257592545e-06, "loss": 0.6687, "step": 10920 }, { "epoch": 0.7, "grad_norm": 1.591044473966039, "learning_rate": 2.193236368086275e-06, "loss": 0.6431, "step": 10921 }, { "epoch": 0.7, "grad_norm": 1.4575981953783836, "learning_rate": 2.1923785992088126e-06, "loss": 0.7094, "step": 10922 }, { "epoch": 0.7, "grad_norm": 1.5998724534733635, "learning_rate": 2.191520950997014e-06, "loss": 0.6705, "step": 10923 }, { "epoch": 0.7, "grad_norm": 1.75257101840268, "learning_rate": 2.1906634234877453e-06, "loss": 0.6873, "step": 10924 }, { "epoch": 0.7, "grad_norm": 1.4589885740531554, "learning_rate": 2.1898060167178604e-06, "loss": 0.6854, "step": 10925 }, { "epoch": 0.7, "grad_norm": 1.4899935813212561, "learning_rate": 2.1889487307242054e-06, "loss": 0.6307, "step": 10926 }, { "epoch": 0.7, "grad_norm": 1.5443968080275707, "learning_rate": 2.18809156554363e-06, "loss": 0.6743, "step": 10927 }, { "epoch": 0.7, "grad_norm": 1.626122862265776, "learning_rate": 2.187234521212968e-06, "loss": 0.7152, "step": 10928 }, { "epoch": 0.7, "grad_norm": 1.568203058338455, "learning_rate": 2.1863775977690588e-06, "loss": 0.6065, "step": 10929 }, { "epoch": 0.7, "grad_norm": 1.9916856097890034, "learning_rate": 2.185520795248725e-06, "loss": 0.6999, "step": 10930 }, { "epoch": 0.7, "grad_norm": 1.4321423624981817, "learning_rate": 2.1846641136887947e-06, "loss": 0.6333, "step": 10931 }, { "epoch": 0.7, "grad_norm": 1.5284646457699478, "learning_rate": 2.1838075531260854e-06, "loss": 0.7111, "step": 10932 }, { "epoch": 0.7, "grad_norm": 1.7267591238012627, "learning_rate": 2.182951113597408e-06, "loss": 0.647, "step": 10933 }, { "epoch": 0.7, "grad_norm": 1.4014436773065524, "learning_rate": 2.182094795139576e-06, "loss": 0.6052, "step": 10934 }, { "epoch": 0.7, "grad_norm": 1.6225533270780605, "learning_rate": 2.1812385977893844e-06, "loss": 0.759, "step": 10935 }, { "epoch": 0.7, "grad_norm": 1.4819397382257529, "learning_rate": 2.1803825215836387e-06, "loss": 0.5935, "step": 10936 }, { "epoch": 0.7, "grad_norm": 1.4802309550810993, "learning_rate": 2.1795265665591236e-06, "loss": 0.6649, "step": 10937 }, { "epoch": 0.7, "grad_norm": 1.4604456940449253, "learning_rate": 2.1786707327526325e-06, "loss": 0.6094, "step": 10938 }, { "epoch": 0.7, "grad_norm": 1.594826987392518, "learning_rate": 2.177815020200944e-06, "loss": 0.7246, "step": 10939 }, { "epoch": 0.7, "grad_norm": 1.7300512236322787, "learning_rate": 2.176959428940836e-06, "loss": 0.6561, "step": 10940 }, { "epoch": 0.7, "grad_norm": 1.4402867924795297, "learning_rate": 2.17610395900908e-06, "loss": 0.6875, "step": 10941 }, { "epoch": 0.7, "grad_norm": 1.5948401058010955, "learning_rate": 2.1752486104424403e-06, "loss": 0.6881, "step": 10942 }, { "epoch": 0.7, "grad_norm": 2.1571923018505808, "learning_rate": 2.174393383277683e-06, "loss": 0.5938, "step": 10943 }, { "epoch": 0.7, "grad_norm": 1.554654879332223, "learning_rate": 2.173538277551557e-06, "loss": 0.6525, "step": 10944 }, { "epoch": 0.7, "grad_norm": 1.5837882786611668, "learning_rate": 2.1726832933008185e-06, "loss": 0.669, "step": 10945 }, { "epoch": 0.7, "grad_norm": 1.5336107106330756, "learning_rate": 2.171828430562211e-06, "loss": 0.6362, "step": 10946 }, { "epoch": 0.7, "grad_norm": 1.5282063294026798, "learning_rate": 2.170973689372475e-06, "loss": 0.7384, "step": 10947 }, { "epoch": 0.7, "grad_norm": 1.4991334055035939, "learning_rate": 2.170119069768345e-06, "loss": 0.7077, "step": 10948 }, { "epoch": 0.7, "grad_norm": 1.7640726820205832, "learning_rate": 2.1692645717865515e-06, "loss": 0.6827, "step": 10949 }, { "epoch": 0.7, "grad_norm": 1.2886542206144982, "learning_rate": 2.1684101954638176e-06, "loss": 0.6049, "step": 10950 }, { "epoch": 0.7, "grad_norm": 1.4929020308717733, "learning_rate": 2.1675559408368623e-06, "loss": 0.6397, "step": 10951 }, { "epoch": 0.7, "grad_norm": 1.7024286915939042, "learning_rate": 2.166701807942404e-06, "loss": 0.6682, "step": 10952 }, { "epoch": 0.7, "grad_norm": 1.5316262329563364, "learning_rate": 2.165847796817145e-06, "loss": 0.6835, "step": 10953 }, { "epoch": 0.7, "grad_norm": 1.5888194996664424, "learning_rate": 2.1649939074977945e-06, "loss": 0.611, "step": 10954 }, { "epoch": 0.7, "grad_norm": 1.6119371665969033, "learning_rate": 2.164140140021049e-06, "loss": 0.7333, "step": 10955 }, { "epoch": 0.7, "grad_norm": 1.368475701591887, "learning_rate": 2.1632864944236004e-06, "loss": 0.6521, "step": 10956 }, { "epoch": 0.7, "grad_norm": 1.609130372586526, "learning_rate": 2.1624329707421374e-06, "loss": 0.8001, "step": 10957 }, { "epoch": 0.7, "grad_norm": 1.79357468226098, "learning_rate": 2.161579569013344e-06, "loss": 0.673, "step": 10958 }, { "epoch": 0.7, "grad_norm": 1.5398966121889557, "learning_rate": 2.1607262892738956e-06, "loss": 0.666, "step": 10959 }, { "epoch": 0.7, "grad_norm": 1.4136479302443328, "learning_rate": 2.1598731315604647e-06, "loss": 0.6842, "step": 10960 }, { "epoch": 0.7, "grad_norm": 1.6524177425014253, "learning_rate": 2.1590200959097173e-06, "loss": 0.7487, "step": 10961 }, { "epoch": 0.7, "grad_norm": 1.3964811730496562, "learning_rate": 2.1581671823583183e-06, "loss": 0.6268, "step": 10962 }, { "epoch": 0.7, "grad_norm": 1.5642813413631182, "learning_rate": 2.157314390942923e-06, "loss": 0.6489, "step": 10963 }, { "epoch": 0.7, "grad_norm": 1.1768501866210366, "learning_rate": 2.156461721700181e-06, "loss": 0.6914, "step": 10964 }, { "epoch": 0.7, "grad_norm": 1.1041963534442818, "learning_rate": 2.1556091746667392e-06, "loss": 0.6469, "step": 10965 }, { "epoch": 0.7, "grad_norm": 1.1615141705380987, "learning_rate": 2.1547567498792382e-06, "loss": 0.5681, "step": 10966 }, { "epoch": 0.7, "grad_norm": 1.5238580454201371, "learning_rate": 2.1539044473743136e-06, "loss": 0.757, "step": 10967 }, { "epoch": 0.7, "grad_norm": 1.4423166054351746, "learning_rate": 2.1530522671885935e-06, "loss": 0.691, "step": 10968 }, { "epoch": 0.7, "grad_norm": 1.5132464632290772, "learning_rate": 2.152200209358708e-06, "loss": 0.6273, "step": 10969 }, { "epoch": 0.7, "grad_norm": 1.5221052118987097, "learning_rate": 2.15134827392127e-06, "loss": 0.5988, "step": 10970 }, { "epoch": 0.7, "grad_norm": 1.0104154052523582, "learning_rate": 2.1504964609128994e-06, "loss": 0.7294, "step": 10971 }, { "epoch": 0.7, "grad_norm": 1.3809342887514406, "learning_rate": 2.1496447703702035e-06, "loss": 0.684, "step": 10972 }, { "epoch": 0.7, "grad_norm": 1.2492137113394726, "learning_rate": 2.1487932023297853e-06, "loss": 0.6191, "step": 10973 }, { "epoch": 0.7, "grad_norm": 1.5222676535063449, "learning_rate": 2.147941756828244e-06, "loss": 0.6809, "step": 10974 }, { "epoch": 0.7, "grad_norm": 2.0578017051487887, "learning_rate": 2.147090433902172e-06, "loss": 0.728, "step": 10975 }, { "epoch": 0.7, "grad_norm": 1.4815962067994641, "learning_rate": 2.146239233588161e-06, "loss": 0.6992, "step": 10976 }, { "epoch": 0.7, "grad_norm": 1.4843593673188038, "learning_rate": 2.1453881559227883e-06, "loss": 0.6767, "step": 10977 }, { "epoch": 0.7, "grad_norm": 1.5395615369741635, "learning_rate": 2.1445372009426374e-06, "loss": 0.6796, "step": 10978 }, { "epoch": 0.7, "grad_norm": 1.4551752511671463, "learning_rate": 2.143686368684274e-06, "loss": 0.7132, "step": 10979 }, { "epoch": 0.7, "grad_norm": 1.6125571283634863, "learning_rate": 2.1428356591842707e-06, "loss": 0.6457, "step": 10980 }, { "epoch": 0.7, "grad_norm": 1.4861502880235702, "learning_rate": 2.141985072479187e-06, "loss": 0.6832, "step": 10981 }, { "epoch": 0.7, "grad_norm": 1.6311151910338204, "learning_rate": 2.141134608605579e-06, "loss": 0.7399, "step": 10982 }, { "epoch": 0.7, "grad_norm": 1.4602762590778513, "learning_rate": 2.1402842675999978e-06, "loss": 0.7619, "step": 10983 }, { "epoch": 0.7, "grad_norm": 1.1025757279640798, "learning_rate": 2.139434049498989e-06, "loss": 0.6604, "step": 10984 }, { "epoch": 0.7, "grad_norm": 1.6484507640985973, "learning_rate": 2.1385839543390967e-06, "loss": 0.671, "step": 10985 }, { "epoch": 0.7, "grad_norm": 1.4859846691903242, "learning_rate": 2.13773398215685e-06, "loss": 0.6342, "step": 10986 }, { "epoch": 0.7, "grad_norm": 1.9078614136680077, "learning_rate": 2.136884132988784e-06, "loss": 0.6893, "step": 10987 }, { "epoch": 0.7, "grad_norm": 1.5687063613082166, "learning_rate": 2.1360344068714216e-06, "loss": 0.6237, "step": 10988 }, { "epoch": 0.7, "grad_norm": 1.6874687637226604, "learning_rate": 2.1351848038412832e-06, "loss": 0.6824, "step": 10989 }, { "epoch": 0.7, "grad_norm": 1.5091590424872217, "learning_rate": 2.1343353239348812e-06, "loss": 0.6593, "step": 10990 }, { "epoch": 0.7, "grad_norm": 1.4344677589337065, "learning_rate": 2.1334859671887236e-06, "loss": 0.5981, "step": 10991 }, { "epoch": 0.7, "grad_norm": 1.613794757605331, "learning_rate": 2.13263673363932e-06, "loss": 0.6446, "step": 10992 }, { "epoch": 0.7, "grad_norm": 1.5078615005513136, "learning_rate": 2.1317876233231606e-06, "loss": 0.7014, "step": 10993 }, { "epoch": 0.7, "grad_norm": 1.449614756941697, "learning_rate": 2.1309386362767453e-06, "loss": 0.6559, "step": 10994 }, { "epoch": 0.7, "grad_norm": 1.0063133524124608, "learning_rate": 2.1300897725365555e-06, "loss": 0.6442, "step": 10995 }, { "epoch": 0.7, "grad_norm": 1.5796067515492258, "learning_rate": 2.1292410321390786e-06, "loss": 0.735, "step": 10996 }, { "epoch": 0.7, "grad_norm": 1.4416923502358985, "learning_rate": 2.12839241512079e-06, "loss": 0.7196, "step": 10997 }, { "epoch": 0.7, "grad_norm": 1.495384868829468, "learning_rate": 2.127543921518159e-06, "loss": 0.692, "step": 10998 }, { "epoch": 0.7, "grad_norm": 1.7856047874474665, "learning_rate": 2.1266955513676584e-06, "loss": 0.5698, "step": 10999 }, { "epoch": 0.7, "grad_norm": 1.7252107828703636, "learning_rate": 2.1258473047057416e-06, "loss": 0.6643, "step": 11000 }, { "epoch": 0.7, "grad_norm": 1.494120793320888, "learning_rate": 2.124999181568872e-06, "loss": 0.5626, "step": 11001 }, { "epoch": 0.7, "grad_norm": 1.5510821950407214, "learning_rate": 2.1241511819934923e-06, "loss": 0.6239, "step": 11002 }, { "epoch": 0.7, "grad_norm": 1.5404135329949133, "learning_rate": 2.1233033060160542e-06, "loss": 0.6147, "step": 11003 }, { "epoch": 0.7, "grad_norm": 1.7264652809261483, "learning_rate": 2.1224555536729952e-06, "loss": 0.6809, "step": 11004 }, { "epoch": 0.7, "grad_norm": 1.7389988735415014, "learning_rate": 2.121607925000749e-06, "loss": 0.6327, "step": 11005 }, { "epoch": 0.7, "grad_norm": 1.6717987021549674, "learning_rate": 2.1207604200357466e-06, "loss": 0.714, "step": 11006 }, { "epoch": 0.7, "grad_norm": 1.766821537156178, "learning_rate": 2.1199130388144098e-06, "loss": 0.7149, "step": 11007 }, { "epoch": 0.7, "grad_norm": 1.6975098004157512, "learning_rate": 2.119065781373162e-06, "loss": 0.6414, "step": 11008 }, { "epoch": 0.7, "grad_norm": 1.5904923729642935, "learning_rate": 2.1182186477484094e-06, "loss": 0.5775, "step": 11009 }, { "epoch": 0.7, "grad_norm": 1.6298781733524106, "learning_rate": 2.1173716379765656e-06, "loss": 0.6639, "step": 11010 }, { "epoch": 0.7, "grad_norm": 1.7082786129673353, "learning_rate": 2.1165247520940317e-06, "loss": 0.6919, "step": 11011 }, { "epoch": 0.7, "grad_norm": 1.567259311007592, "learning_rate": 2.1156779901372053e-06, "loss": 0.6388, "step": 11012 }, { "epoch": 0.7, "grad_norm": 2.000400516122066, "learning_rate": 2.114831352142478e-06, "loss": 0.7091, "step": 11013 }, { "epoch": 0.7, "grad_norm": 1.6818379808972856, "learning_rate": 2.1139848381462363e-06, "loss": 0.7154, "step": 11014 }, { "epoch": 0.71, "grad_norm": 1.433365792783139, "learning_rate": 2.1131384481848614e-06, "loss": 0.5731, "step": 11015 }, { "epoch": 0.71, "grad_norm": 1.4959511921308835, "learning_rate": 2.112292182294729e-06, "loss": 0.6904, "step": 11016 }, { "epoch": 0.71, "grad_norm": 1.0345588837256623, "learning_rate": 2.111446040512212e-06, "loss": 0.5855, "step": 11017 }, { "epoch": 0.71, "grad_norm": 1.4814306780088982, "learning_rate": 2.110600022873675e-06, "loss": 0.6017, "step": 11018 }, { "epoch": 0.71, "grad_norm": 1.8935277399987611, "learning_rate": 2.1097541294154773e-06, "loss": 0.674, "step": 11019 }, { "epoch": 0.71, "grad_norm": 1.640966328518358, "learning_rate": 2.1089083601739735e-06, "loss": 0.6717, "step": 11020 }, { "epoch": 0.71, "grad_norm": 1.721326239375118, "learning_rate": 2.108062715185514e-06, "loss": 0.7342, "step": 11021 }, { "epoch": 0.71, "grad_norm": 1.4486447224289185, "learning_rate": 2.1072171944864415e-06, "loss": 0.5761, "step": 11022 }, { "epoch": 0.71, "grad_norm": 1.3954939611194013, "learning_rate": 2.1063717981130952e-06, "loss": 0.6583, "step": 11023 }, { "epoch": 0.71, "grad_norm": 1.4187151275521073, "learning_rate": 2.10552652610181e-06, "loss": 0.6703, "step": 11024 }, { "epoch": 0.71, "grad_norm": 1.5771495717430928, "learning_rate": 2.10468137848891e-06, "loss": 0.627, "step": 11025 }, { "epoch": 0.71, "grad_norm": 1.3590116882027643, "learning_rate": 2.103836355310722e-06, "loss": 0.6647, "step": 11026 }, { "epoch": 0.71, "grad_norm": 1.6600032614479945, "learning_rate": 2.102991456603562e-06, "loss": 0.6315, "step": 11027 }, { "epoch": 0.71, "grad_norm": 1.550063843102125, "learning_rate": 2.102146682403742e-06, "loss": 0.6048, "step": 11028 }, { "epoch": 0.71, "grad_norm": 1.5579494460380507, "learning_rate": 2.1013020327475683e-06, "loss": 0.6102, "step": 11029 }, { "epoch": 0.71, "grad_norm": 1.8812836509962334, "learning_rate": 2.100457507671341e-06, "loss": 0.68, "step": 11030 }, { "epoch": 0.71, "grad_norm": 1.533606810658593, "learning_rate": 2.099613107211359e-06, "loss": 0.7324, "step": 11031 }, { "epoch": 0.71, "grad_norm": 1.7390883941641815, "learning_rate": 2.09876883140391e-06, "loss": 0.6033, "step": 11032 }, { "epoch": 0.71, "grad_norm": 1.742067539830002, "learning_rate": 2.0979246802852794e-06, "loss": 0.7305, "step": 11033 }, { "epoch": 0.71, "grad_norm": 1.5090548456737585, "learning_rate": 2.0970806538917506e-06, "loss": 0.6979, "step": 11034 }, { "epoch": 0.71, "grad_norm": 1.7477798966445743, "learning_rate": 2.096236752259592e-06, "loss": 0.6987, "step": 11035 }, { "epoch": 0.71, "grad_norm": 1.5189306964163334, "learning_rate": 2.0953929754250783e-06, "loss": 0.6588, "step": 11036 }, { "epoch": 0.71, "grad_norm": 1.4691383952471941, "learning_rate": 2.0945493234244714e-06, "loss": 0.6399, "step": 11037 }, { "epoch": 0.71, "grad_norm": 1.4446131497296548, "learning_rate": 2.0937057962940287e-06, "loss": 0.598, "step": 11038 }, { "epoch": 0.71, "grad_norm": 1.6131668075326584, "learning_rate": 2.0928623940700044e-06, "loss": 0.7085, "step": 11039 }, { "epoch": 0.71, "grad_norm": 1.8998645009668573, "learning_rate": 2.0920191167886435e-06, "loss": 0.7764, "step": 11040 }, { "epoch": 0.71, "grad_norm": 1.518001741154023, "learning_rate": 2.091175964486194e-06, "loss": 0.6404, "step": 11041 }, { "epoch": 0.71, "grad_norm": 1.5601704459505243, "learning_rate": 2.0903329371988864e-06, "loss": 0.6368, "step": 11042 }, { "epoch": 0.71, "grad_norm": 1.7218895453925604, "learning_rate": 2.0894900349629576e-06, "loss": 0.6645, "step": 11043 }, { "epoch": 0.71, "grad_norm": 1.6163250695554499, "learning_rate": 2.0886472578146284e-06, "loss": 0.6332, "step": 11044 }, { "epoch": 0.71, "grad_norm": 1.5609169120383162, "learning_rate": 2.087804605790124e-06, "loss": 0.5962, "step": 11045 }, { "epoch": 0.71, "grad_norm": 1.5625335931281479, "learning_rate": 2.0869620789256583e-06, "loss": 0.6856, "step": 11046 }, { "epoch": 0.71, "grad_norm": 1.6358135673344172, "learning_rate": 2.08611967725744e-06, "loss": 0.595, "step": 11047 }, { "epoch": 0.71, "grad_norm": 2.2116091766418084, "learning_rate": 2.0852774008216782e-06, "loss": 0.6232, "step": 11048 }, { "epoch": 0.71, "grad_norm": 1.1354608828625443, "learning_rate": 2.0844352496545652e-06, "loss": 0.6349, "step": 11049 }, { "epoch": 0.71, "grad_norm": 1.5400855722290638, "learning_rate": 2.0835932237923027e-06, "loss": 0.6546, "step": 11050 }, { "epoch": 0.71, "grad_norm": 1.9288713106663002, "learning_rate": 2.0827513232710716e-06, "loss": 0.6938, "step": 11051 }, { "epoch": 0.71, "grad_norm": 1.7497789927763703, "learning_rate": 2.0819095481270603e-06, "loss": 0.7261, "step": 11052 }, { "epoch": 0.71, "grad_norm": 1.562533000075473, "learning_rate": 2.081067898396445e-06, "loss": 0.6349, "step": 11053 }, { "epoch": 0.71, "grad_norm": 1.5100435432982093, "learning_rate": 2.080226374115396e-06, "loss": 0.6762, "step": 11054 }, { "epoch": 0.71, "grad_norm": 1.524139163580959, "learning_rate": 2.0793849753200855e-06, "loss": 0.6241, "step": 11055 }, { "epoch": 0.71, "grad_norm": 1.5731887565548224, "learning_rate": 2.0785437020466686e-06, "loss": 0.742, "step": 11056 }, { "epoch": 0.71, "grad_norm": 1.4642564302007814, "learning_rate": 2.077702554331308e-06, "loss": 0.6312, "step": 11057 }, { "epoch": 0.71, "grad_norm": 1.5135648841856302, "learning_rate": 2.076861532210148e-06, "loss": 0.6431, "step": 11058 }, { "epoch": 0.71, "grad_norm": 1.719688115925032, "learning_rate": 2.0760206357193373e-06, "loss": 0.651, "step": 11059 }, { "epoch": 0.71, "grad_norm": 1.1746614327424707, "learning_rate": 2.075179864895017e-06, "loss": 0.6164, "step": 11060 }, { "epoch": 0.71, "grad_norm": 1.733633937916602, "learning_rate": 2.0743392197733193e-06, "loss": 0.6552, "step": 11061 }, { "epoch": 0.71, "grad_norm": 1.697336218268035, "learning_rate": 2.0734987003903747e-06, "loss": 0.6367, "step": 11062 }, { "epoch": 0.71, "grad_norm": 1.785877262218665, "learning_rate": 2.0726583067823046e-06, "loss": 0.699, "step": 11063 }, { "epoch": 0.71, "grad_norm": 1.6256053136568729, "learning_rate": 2.0718180389852325e-06, "loss": 0.6648, "step": 11064 }, { "epoch": 0.71, "grad_norm": 1.5379452048025066, "learning_rate": 2.0709778970352657e-06, "loss": 0.6008, "step": 11065 }, { "epoch": 0.71, "grad_norm": 1.0927658889261134, "learning_rate": 2.070137880968517e-06, "loss": 0.6371, "step": 11066 }, { "epoch": 0.71, "grad_norm": 1.5986054495580078, "learning_rate": 2.069297990821082e-06, "loss": 0.6714, "step": 11067 }, { "epoch": 0.71, "grad_norm": 1.6897978295431522, "learning_rate": 2.0684582266290626e-06, "loss": 0.5239, "step": 11068 }, { "epoch": 0.71, "grad_norm": 1.909081171021416, "learning_rate": 2.0676185884285495e-06, "loss": 0.665, "step": 11069 }, { "epoch": 0.71, "grad_norm": 1.4192057123221302, "learning_rate": 2.0667790762556267e-06, "loss": 0.6761, "step": 11070 }, { "epoch": 0.71, "grad_norm": 1.4540875818069154, "learning_rate": 2.0659396901463764e-06, "loss": 0.6426, "step": 11071 }, { "epoch": 0.71, "grad_norm": 1.8883893387005806, "learning_rate": 2.0651004301368712e-06, "loss": 0.6968, "step": 11072 }, { "epoch": 0.71, "grad_norm": 1.5842065937448868, "learning_rate": 2.064261296263185e-06, "loss": 0.6177, "step": 11073 }, { "epoch": 0.71, "grad_norm": 1.5086380987862347, "learning_rate": 2.063422288561377e-06, "loss": 0.6572, "step": 11074 }, { "epoch": 0.71, "grad_norm": 1.5046222798771631, "learning_rate": 2.0625834070675094e-06, "loss": 0.6168, "step": 11075 }, { "epoch": 0.71, "grad_norm": 1.9905406513760795, "learning_rate": 2.0617446518176354e-06, "loss": 0.6889, "step": 11076 }, { "epoch": 0.71, "grad_norm": 1.0391779320849717, "learning_rate": 2.0609060228478017e-06, "loss": 0.5584, "step": 11077 }, { "epoch": 0.71, "grad_norm": 1.5406680982248955, "learning_rate": 2.0600675201940513e-06, "loss": 0.6393, "step": 11078 }, { "epoch": 0.71, "grad_norm": 1.1569148022313207, "learning_rate": 2.0592291438924213e-06, "loss": 0.6779, "step": 11079 }, { "epoch": 0.71, "grad_norm": 1.7956173161746614, "learning_rate": 2.0583908939789426e-06, "loss": 0.697, "step": 11080 }, { "epoch": 0.71, "grad_norm": 1.504381070114847, "learning_rate": 2.0575527704896414e-06, "loss": 0.6981, "step": 11081 }, { "epoch": 0.71, "grad_norm": 1.5284216669529112, "learning_rate": 2.05671477346054e-06, "loss": 0.7718, "step": 11082 }, { "epoch": 0.71, "grad_norm": 1.8060700524462574, "learning_rate": 2.055876902927654e-06, "loss": 0.6614, "step": 11083 }, { "epoch": 0.71, "grad_norm": 1.3669835764841187, "learning_rate": 2.0550391589269913e-06, "loss": 0.6578, "step": 11084 }, { "epoch": 0.71, "grad_norm": 1.5056649739343122, "learning_rate": 2.0542015414945577e-06, "loss": 0.5648, "step": 11085 }, { "epoch": 0.71, "grad_norm": 1.8348148792750822, "learning_rate": 2.0533640506663523e-06, "loss": 0.7045, "step": 11086 }, { "epoch": 0.71, "grad_norm": 1.5887708155786515, "learning_rate": 2.0525266864783676e-06, "loss": 0.7518, "step": 11087 }, { "epoch": 0.71, "grad_norm": 1.739734580088544, "learning_rate": 2.051689448966593e-06, "loss": 0.6201, "step": 11088 }, { "epoch": 0.71, "grad_norm": 1.4447224948595898, "learning_rate": 2.050852338167008e-06, "loss": 0.7481, "step": 11089 }, { "epoch": 0.71, "grad_norm": 1.8369638146171956, "learning_rate": 2.050015354115595e-06, "loss": 0.6842, "step": 11090 }, { "epoch": 0.71, "grad_norm": 2.04668665263231, "learning_rate": 2.049178496848323e-06, "loss": 0.6345, "step": 11091 }, { "epoch": 0.71, "grad_norm": 1.5419354346745004, "learning_rate": 2.048341766401159e-06, "loss": 0.5881, "step": 11092 }, { "epoch": 0.71, "grad_norm": 1.1077078143198775, "learning_rate": 2.0475051628100635e-06, "loss": 0.7245, "step": 11093 }, { "epoch": 0.71, "grad_norm": 1.9143524455097227, "learning_rate": 2.0466686861109913e-06, "loss": 0.6568, "step": 11094 }, { "epoch": 0.71, "grad_norm": 1.6501176614944078, "learning_rate": 2.045832336339894e-06, "loss": 0.6482, "step": 11095 }, { "epoch": 0.71, "grad_norm": 1.38035257767783, "learning_rate": 2.0449961135327135e-06, "loss": 0.6319, "step": 11096 }, { "epoch": 0.71, "grad_norm": 1.7655091559108393, "learning_rate": 2.044160017725394e-06, "loss": 0.6894, "step": 11097 }, { "epoch": 0.71, "grad_norm": 1.4466296275142607, "learning_rate": 2.0433240489538624e-06, "loss": 0.6412, "step": 11098 }, { "epoch": 0.71, "grad_norm": 1.464558786627592, "learning_rate": 2.042488207254054e-06, "loss": 0.6561, "step": 11099 }, { "epoch": 0.71, "grad_norm": 1.6349696550836696, "learning_rate": 2.041652492661884e-06, "loss": 0.6287, "step": 11100 }, { "epoch": 0.71, "grad_norm": 1.5433702442421153, "learning_rate": 2.040816905213276e-06, "loss": 0.6701, "step": 11101 }, { "epoch": 0.71, "grad_norm": 3.600301104671402, "learning_rate": 2.0399814449441385e-06, "loss": 0.6266, "step": 11102 }, { "epoch": 0.71, "grad_norm": 1.3946371604864922, "learning_rate": 2.0391461118903788e-06, "loss": 0.6919, "step": 11103 }, { "epoch": 0.71, "grad_norm": 1.661267470657279, "learning_rate": 2.038310906087898e-06, "loss": 0.6418, "step": 11104 }, { "epoch": 0.71, "grad_norm": 1.5654890440957512, "learning_rate": 2.0374758275725893e-06, "loss": 0.7117, "step": 11105 }, { "epoch": 0.71, "grad_norm": 1.7747169990063199, "learning_rate": 2.0366408763803476e-06, "loss": 0.728, "step": 11106 }, { "epoch": 0.71, "grad_norm": 1.8783280735045047, "learning_rate": 2.0358060525470507e-06, "loss": 0.7511, "step": 11107 }, { "epoch": 0.71, "grad_norm": 1.791734513015383, "learning_rate": 2.034971356108585e-06, "loss": 0.6079, "step": 11108 }, { "epoch": 0.71, "grad_norm": 1.3587985436299388, "learning_rate": 2.0341367871008154e-06, "loss": 0.5839, "step": 11109 }, { "epoch": 0.71, "grad_norm": 1.6658423494554897, "learning_rate": 2.0333023455596173e-06, "loss": 0.669, "step": 11110 }, { "epoch": 0.71, "grad_norm": 1.369690023772235, "learning_rate": 2.0324680315208505e-06, "loss": 0.7155, "step": 11111 }, { "epoch": 0.71, "grad_norm": 1.6685870922962416, "learning_rate": 2.03163384502037e-06, "loss": 0.6227, "step": 11112 }, { "epoch": 0.71, "grad_norm": 1.7126452839583663, "learning_rate": 2.0307997860940333e-06, "loss": 0.5606, "step": 11113 }, { "epoch": 0.71, "grad_norm": 1.4893359611595591, "learning_rate": 2.0299658547776784e-06, "loss": 0.6737, "step": 11114 }, { "epoch": 0.71, "grad_norm": 1.4741731220489624, "learning_rate": 2.0291320511071544e-06, "loss": 0.7293, "step": 11115 }, { "epoch": 0.71, "grad_norm": 1.5330804158937041, "learning_rate": 2.0282983751182884e-06, "loss": 0.6586, "step": 11116 }, { "epoch": 0.71, "grad_norm": 1.7823746061932204, "learning_rate": 2.0274648268469154e-06, "loss": 0.6799, "step": 11117 }, { "epoch": 0.71, "grad_norm": 1.5974879582525336, "learning_rate": 2.026631406328858e-06, "loss": 0.7314, "step": 11118 }, { "epoch": 0.71, "grad_norm": 1.6275677041529697, "learning_rate": 2.025798113599933e-06, "loss": 0.6378, "step": 11119 }, { "epoch": 0.71, "grad_norm": 1.5915119668100242, "learning_rate": 2.0249649486959595e-06, "loss": 0.6501, "step": 11120 }, { "epoch": 0.71, "grad_norm": 1.7082698735059914, "learning_rate": 2.0241319116527376e-06, "loss": 0.6152, "step": 11121 }, { "epoch": 0.71, "grad_norm": 1.585502120061989, "learning_rate": 2.0232990025060757e-06, "loss": 0.6526, "step": 11122 }, { "epoch": 0.71, "grad_norm": 1.5534723446088505, "learning_rate": 2.022466221291765e-06, "loss": 0.7477, "step": 11123 }, { "epoch": 0.71, "grad_norm": 1.7151427666107182, "learning_rate": 2.0216335680456005e-06, "loss": 0.6866, "step": 11124 }, { "epoch": 0.71, "grad_norm": 2.168447098052557, "learning_rate": 2.0208010428033675e-06, "loss": 0.6766, "step": 11125 }, { "epoch": 0.71, "grad_norm": 2.5139701435915716, "learning_rate": 2.0199686456008456e-06, "loss": 0.7119, "step": 11126 }, { "epoch": 0.71, "grad_norm": 1.9475479001332072, "learning_rate": 2.0191363764738087e-06, "loss": 0.8413, "step": 11127 }, { "epoch": 0.71, "grad_norm": 1.5494497305952828, "learning_rate": 2.018304235458025e-06, "loss": 0.6468, "step": 11128 }, { "epoch": 0.71, "grad_norm": 1.6377762039792905, "learning_rate": 2.017472222589264e-06, "loss": 0.6989, "step": 11129 }, { "epoch": 0.71, "grad_norm": 1.5013698919804361, "learning_rate": 2.0166403379032755e-06, "loss": 0.7742, "step": 11130 }, { "epoch": 0.71, "grad_norm": 1.3459336196452991, "learning_rate": 2.0158085814358187e-06, "loss": 0.6156, "step": 11131 }, { "epoch": 0.71, "grad_norm": 1.681530305419964, "learning_rate": 2.0149769532226383e-06, "loss": 0.6637, "step": 11132 }, { "epoch": 0.71, "grad_norm": 1.1672816687539977, "learning_rate": 2.014145453299476e-06, "loss": 0.6507, "step": 11133 }, { "epoch": 0.71, "grad_norm": 1.0873457959897952, "learning_rate": 2.013314081702068e-06, "loss": 0.6512, "step": 11134 }, { "epoch": 0.71, "grad_norm": 1.1703149411832898, "learning_rate": 2.012482838466145e-06, "loss": 0.6415, "step": 11135 }, { "epoch": 0.71, "grad_norm": 1.6923220098285707, "learning_rate": 2.0116517236274312e-06, "loss": 0.7426, "step": 11136 }, { "epoch": 0.71, "grad_norm": 1.5972891017545652, "learning_rate": 2.010820737221646e-06, "loss": 0.6733, "step": 11137 }, { "epoch": 0.71, "grad_norm": 1.620238421717832, "learning_rate": 2.0099898792845057e-06, "loss": 0.7189, "step": 11138 }, { "epoch": 0.71, "grad_norm": 1.0057953352894007, "learning_rate": 2.0091591498517184e-06, "loss": 0.5953, "step": 11139 }, { "epoch": 0.71, "grad_norm": 1.6179820858616258, "learning_rate": 2.008328548958985e-06, "loss": 0.594, "step": 11140 }, { "epoch": 0.71, "grad_norm": 1.504812582854828, "learning_rate": 2.007498076642005e-06, "loss": 0.6485, "step": 11141 }, { "epoch": 0.71, "grad_norm": 1.7035179216268255, "learning_rate": 2.006667732936469e-06, "loss": 0.694, "step": 11142 }, { "epoch": 0.71, "grad_norm": 1.5695033053051817, "learning_rate": 2.0058375178780644e-06, "loss": 0.689, "step": 11143 }, { "epoch": 0.71, "grad_norm": 1.4705058034725604, "learning_rate": 2.0050074315024716e-06, "loss": 0.6278, "step": 11144 }, { "epoch": 0.71, "grad_norm": 1.830425164673496, "learning_rate": 2.004177473845366e-06, "loss": 0.7367, "step": 11145 }, { "epoch": 0.71, "grad_norm": 1.5527798212030437, "learning_rate": 2.0033476449424156e-06, "loss": 0.5871, "step": 11146 }, { "epoch": 0.71, "grad_norm": 1.6489648845338534, "learning_rate": 2.0025179448292886e-06, "loss": 0.6455, "step": 11147 }, { "epoch": 0.71, "grad_norm": 1.0411532072630707, "learning_rate": 2.0016883735416415e-06, "loss": 0.6875, "step": 11148 }, { "epoch": 0.71, "grad_norm": 1.1556901782713376, "learning_rate": 2.000858931115128e-06, "loss": 0.6439, "step": 11149 }, { "epoch": 0.71, "grad_norm": 2.2249054735102414, "learning_rate": 2.0000296175853956e-06, "loss": 0.6378, "step": 11150 }, { "epoch": 0.71, "grad_norm": 1.5105901967027677, "learning_rate": 1.999200432988086e-06, "loss": 0.6477, "step": 11151 }, { "epoch": 0.71, "grad_norm": 1.3900726685267615, "learning_rate": 1.9983713773588367e-06, "loss": 0.6318, "step": 11152 }, { "epoch": 0.71, "grad_norm": 1.6000411518779682, "learning_rate": 1.997542450733278e-06, "loss": 0.6397, "step": 11153 }, { "epoch": 0.71, "grad_norm": 1.6074706883285328, "learning_rate": 1.9967136531470345e-06, "loss": 0.686, "step": 11154 }, { "epoch": 0.71, "grad_norm": 1.6292400944544363, "learning_rate": 1.9958849846357287e-06, "loss": 0.7237, "step": 11155 }, { "epoch": 0.71, "grad_norm": 1.0872406074384242, "learning_rate": 1.9950564452349733e-06, "loss": 0.6328, "step": 11156 }, { "epoch": 0.71, "grad_norm": 1.4427052348125453, "learning_rate": 1.994228034980378e-06, "loss": 0.714, "step": 11157 }, { "epoch": 0.71, "grad_norm": 1.6087706910002537, "learning_rate": 1.9933997539075468e-06, "loss": 0.6668, "step": 11158 }, { "epoch": 0.71, "grad_norm": 1.536028295677786, "learning_rate": 1.992571602052075e-06, "loss": 0.6199, "step": 11159 }, { "epoch": 0.71, "grad_norm": 1.6099079283175313, "learning_rate": 1.991743579449557e-06, "loss": 0.6398, "step": 11160 }, { "epoch": 0.71, "grad_norm": 1.635813199919835, "learning_rate": 1.9909156861355767e-06, "loss": 0.5959, "step": 11161 }, { "epoch": 0.71, "grad_norm": 1.6873329926634224, "learning_rate": 1.990087922145721e-06, "loss": 0.6773, "step": 11162 }, { "epoch": 0.71, "grad_norm": 1.6901849200902905, "learning_rate": 1.9892602875155582e-06, "loss": 0.6289, "step": 11163 }, { "epoch": 0.71, "grad_norm": 1.3536458249754963, "learning_rate": 1.988432782280663e-06, "loss": 0.6975, "step": 11164 }, { "epoch": 0.71, "grad_norm": 1.6315415324311813, "learning_rate": 1.9876054064765993e-06, "loss": 0.589, "step": 11165 }, { "epoch": 0.71, "grad_norm": 1.6119598938577522, "learning_rate": 1.9867781601389254e-06, "loss": 0.6487, "step": 11166 }, { "epoch": 0.71, "grad_norm": 1.631382173959785, "learning_rate": 1.9859510433031943e-06, "loss": 0.7576, "step": 11167 }, { "epoch": 0.71, "grad_norm": 1.4524273496868145, "learning_rate": 1.9851240560049516e-06, "loss": 0.6852, "step": 11168 }, { "epoch": 0.71, "grad_norm": 1.4296179086557887, "learning_rate": 1.984297198279746e-06, "loss": 0.6846, "step": 11169 }, { "epoch": 0.71, "grad_norm": 1.4579183383904084, "learning_rate": 1.9834704701631063e-06, "loss": 0.6719, "step": 11170 }, { "epoch": 0.72, "grad_norm": 1.4707265700961125, "learning_rate": 1.982643871690571e-06, "loss": 0.6724, "step": 11171 }, { "epoch": 0.72, "grad_norm": 1.177403784323347, "learning_rate": 1.9818174028976576e-06, "loss": 0.6757, "step": 11172 }, { "epoch": 0.72, "grad_norm": 1.4700643511045504, "learning_rate": 1.980991063819893e-06, "loss": 0.681, "step": 11173 }, { "epoch": 0.72, "grad_norm": 2.4785986264921136, "learning_rate": 1.9801648544927876e-06, "loss": 0.595, "step": 11174 }, { "epoch": 0.72, "grad_norm": 1.3709545015003797, "learning_rate": 1.9793387749518517e-06, "loss": 0.5947, "step": 11175 }, { "epoch": 0.72, "grad_norm": 1.5239089842617328, "learning_rate": 1.9785128252325877e-06, "loss": 0.7274, "step": 11176 }, { "epoch": 0.72, "grad_norm": 1.169350909611455, "learning_rate": 1.9776870053704917e-06, "loss": 0.6829, "step": 11177 }, { "epoch": 0.72, "grad_norm": 1.748201517191277, "learning_rate": 1.9768613154010612e-06, "loss": 0.6839, "step": 11178 }, { "epoch": 0.72, "grad_norm": 1.742503932060437, "learning_rate": 1.976035755359775e-06, "loss": 0.7452, "step": 11179 }, { "epoch": 0.72, "grad_norm": 1.102553589225965, "learning_rate": 1.9752103252821202e-06, "loss": 0.6363, "step": 11180 }, { "epoch": 0.72, "grad_norm": 1.5736815469170866, "learning_rate": 1.974385025203569e-06, "loss": 0.7128, "step": 11181 }, { "epoch": 0.72, "grad_norm": 1.5724241207969207, "learning_rate": 1.9735598551595927e-06, "loss": 0.6347, "step": 11182 }, { "epoch": 0.72, "grad_norm": 1.5274015207738456, "learning_rate": 1.9727348151856535e-06, "loss": 0.6493, "step": 11183 }, { "epoch": 0.72, "grad_norm": 1.6477634594591881, "learning_rate": 1.971909905317209e-06, "loss": 0.6864, "step": 11184 }, { "epoch": 0.72, "grad_norm": 2.1375774810418933, "learning_rate": 1.9710851255897173e-06, "loss": 0.7542, "step": 11185 }, { "epoch": 0.72, "grad_norm": 1.4041372851594494, "learning_rate": 1.9702604760386194e-06, "loss": 0.7323, "step": 11186 }, { "epoch": 0.72, "grad_norm": 1.7394906578226703, "learning_rate": 1.969435956699363e-06, "loss": 0.6161, "step": 11187 }, { "epoch": 0.72, "grad_norm": 1.7401934735644635, "learning_rate": 1.9686115676073775e-06, "loss": 0.6553, "step": 11188 }, { "epoch": 0.72, "grad_norm": 1.539303879061253, "learning_rate": 1.967787308798099e-06, "loss": 0.6137, "step": 11189 }, { "epoch": 0.72, "grad_norm": 1.5520287294397421, "learning_rate": 1.9669631803069506e-06, "loss": 0.7239, "step": 11190 }, { "epoch": 0.72, "grad_norm": 1.9289304207130975, "learning_rate": 1.966139182169351e-06, "loss": 0.6601, "step": 11191 }, { "epoch": 0.72, "grad_norm": 1.5166031051236615, "learning_rate": 1.9653153144207144e-06, "loss": 0.6902, "step": 11192 }, { "epoch": 0.72, "grad_norm": 2.033675657668285, "learning_rate": 1.9644915770964472e-06, "loss": 0.628, "step": 11193 }, { "epoch": 0.72, "grad_norm": 1.6631103573280661, "learning_rate": 1.9636679702319566e-06, "loss": 0.6649, "step": 11194 }, { "epoch": 0.72, "grad_norm": 1.7554899868473652, "learning_rate": 1.9628444938626336e-06, "loss": 0.5971, "step": 11195 }, { "epoch": 0.72, "grad_norm": 1.5299979861122361, "learning_rate": 1.9620211480238737e-06, "loss": 0.6525, "step": 11196 }, { "epoch": 0.72, "grad_norm": 1.6863010957904903, "learning_rate": 1.9611979327510617e-06, "loss": 0.6783, "step": 11197 }, { "epoch": 0.72, "grad_norm": 1.5676422229949225, "learning_rate": 1.9603748480795763e-06, "loss": 0.6967, "step": 11198 }, { "epoch": 0.72, "grad_norm": 1.6974957835812214, "learning_rate": 1.9595518940447933e-06, "loss": 0.7343, "step": 11199 }, { "epoch": 0.72, "grad_norm": 1.5663657309104566, "learning_rate": 1.958729070682081e-06, "loss": 0.6426, "step": 11200 }, { "epoch": 0.72, "grad_norm": 1.4891956267994912, "learning_rate": 1.9579063780268026e-06, "loss": 0.7533, "step": 11201 }, { "epoch": 0.72, "grad_norm": 1.46942753255488, "learning_rate": 1.957083816114314e-06, "loss": 0.6256, "step": 11202 }, { "epoch": 0.72, "grad_norm": 1.489478420243967, "learning_rate": 1.9562613849799704e-06, "loss": 0.6571, "step": 11203 }, { "epoch": 0.72, "grad_norm": 1.4122336920375032, "learning_rate": 1.955439084659117e-06, "loss": 0.6412, "step": 11204 }, { "epoch": 0.72, "grad_norm": 1.9449378074788561, "learning_rate": 1.9546169151870943e-06, "loss": 0.7173, "step": 11205 }, { "epoch": 0.72, "grad_norm": 1.6381123541804248, "learning_rate": 1.953794876599237e-06, "loss": 0.6645, "step": 11206 }, { "epoch": 0.72, "grad_norm": 1.539257327792251, "learning_rate": 1.9529729689308756e-06, "loss": 0.5919, "step": 11207 }, { "epoch": 0.72, "grad_norm": 1.6432481782360178, "learning_rate": 1.952151192217333e-06, "loss": 0.6467, "step": 11208 }, { "epoch": 0.72, "grad_norm": 1.9842497521720186, "learning_rate": 1.9513295464939274e-06, "loss": 0.7929, "step": 11209 }, { "epoch": 0.72, "grad_norm": 1.6384677223679138, "learning_rate": 1.9505080317959702e-06, "loss": 0.6297, "step": 11210 }, { "epoch": 0.72, "grad_norm": 1.6559826031654774, "learning_rate": 1.9496866481587717e-06, "loss": 0.6254, "step": 11211 }, { "epoch": 0.72, "grad_norm": 1.432075522514291, "learning_rate": 1.948865395617632e-06, "loss": 0.6859, "step": 11212 }, { "epoch": 0.72, "grad_norm": 1.4650071038330215, "learning_rate": 1.9480442742078455e-06, "loss": 0.6567, "step": 11213 }, { "epoch": 0.72, "grad_norm": 1.3690592721910653, "learning_rate": 1.9472232839647032e-06, "loss": 0.644, "step": 11214 }, { "epoch": 0.72, "grad_norm": 1.5967245456873527, "learning_rate": 1.9464024249234895e-06, "loss": 0.6021, "step": 11215 }, { "epoch": 0.72, "grad_norm": 1.6719382256003212, "learning_rate": 1.9455816971194834e-06, "loss": 0.6721, "step": 11216 }, { "epoch": 0.72, "grad_norm": 1.3614515610681368, "learning_rate": 1.9447611005879573e-06, "loss": 0.6921, "step": 11217 }, { "epoch": 0.72, "grad_norm": 1.5849276271145314, "learning_rate": 1.943940635364179e-06, "loss": 0.7214, "step": 11218 }, { "epoch": 0.72, "grad_norm": 1.6643370504252757, "learning_rate": 1.9431203014834093e-06, "loss": 0.7492, "step": 11219 }, { "epoch": 0.72, "grad_norm": 1.5790507823229454, "learning_rate": 1.942300098980907e-06, "loss": 0.7204, "step": 11220 }, { "epoch": 0.72, "grad_norm": 1.3705577291226452, "learning_rate": 1.9414800278919223e-06, "loss": 0.7225, "step": 11221 }, { "epoch": 0.72, "grad_norm": 1.4857611870476668, "learning_rate": 1.940660088251698e-06, "loss": 0.6302, "step": 11222 }, { "epoch": 0.72, "grad_norm": 2.0148786648091463, "learning_rate": 1.9398402800954746e-06, "loss": 0.8004, "step": 11223 }, { "epoch": 0.72, "grad_norm": 1.9047731465712017, "learning_rate": 1.939020603458486e-06, "loss": 0.7154, "step": 11224 }, { "epoch": 0.72, "grad_norm": 1.5984321958424317, "learning_rate": 1.9382010583759604e-06, "loss": 0.6929, "step": 11225 }, { "epoch": 0.72, "grad_norm": 1.4592634860764035, "learning_rate": 1.937381644883117e-06, "loss": 0.6527, "step": 11226 }, { "epoch": 0.72, "grad_norm": 1.6425694098219807, "learning_rate": 1.936562363015179e-06, "loss": 0.6145, "step": 11227 }, { "epoch": 0.72, "grad_norm": 1.992820295921451, "learning_rate": 1.93574321280735e-06, "loss": 0.7262, "step": 11228 }, { "epoch": 0.72, "grad_norm": 1.4539870607113612, "learning_rate": 1.9349241942948405e-06, "loss": 0.6926, "step": 11229 }, { "epoch": 0.72, "grad_norm": 1.816384493719138, "learning_rate": 1.934105307512848e-06, "loss": 0.7089, "step": 11230 }, { "epoch": 0.72, "grad_norm": 1.6440327451900505, "learning_rate": 1.9332865524965677e-06, "loss": 0.6269, "step": 11231 }, { "epoch": 0.72, "grad_norm": 1.625170380896999, "learning_rate": 1.932467929281187e-06, "loss": 0.6419, "step": 11232 }, { "epoch": 0.72, "grad_norm": 1.806104423845855, "learning_rate": 1.9316494379018876e-06, "loss": 0.727, "step": 11233 }, { "epoch": 0.72, "grad_norm": 1.6952418346472171, "learning_rate": 1.9308310783938505e-06, "loss": 0.7141, "step": 11234 }, { "epoch": 0.72, "grad_norm": 1.5415426146986169, "learning_rate": 1.9300128507922417e-06, "loss": 0.5795, "step": 11235 }, { "epoch": 0.72, "grad_norm": 1.655992985692183, "learning_rate": 1.9291947551322327e-06, "loss": 0.7017, "step": 11236 }, { "epoch": 0.72, "grad_norm": 1.4473486397183002, "learning_rate": 1.9283767914489777e-06, "loss": 0.658, "step": 11237 }, { "epoch": 0.72, "grad_norm": 1.1781686949413595, "learning_rate": 1.9275589597776346e-06, "loss": 0.7169, "step": 11238 }, { "epoch": 0.72, "grad_norm": 1.5227463587044834, "learning_rate": 1.926741260153352e-06, "loss": 0.7043, "step": 11239 }, { "epoch": 0.72, "grad_norm": 1.485027114654008, "learning_rate": 1.9259236926112702e-06, "loss": 0.6546, "step": 11240 }, { "epoch": 0.72, "grad_norm": 1.6146096482617793, "learning_rate": 1.925106257186532e-06, "loss": 0.7504, "step": 11241 }, { "epoch": 0.72, "grad_norm": 1.478915127400472, "learning_rate": 1.9242889539142624e-06, "loss": 0.5838, "step": 11242 }, { "epoch": 0.72, "grad_norm": 1.6625662402111925, "learning_rate": 1.923471782829594e-06, "loss": 0.7003, "step": 11243 }, { "epoch": 0.72, "grad_norm": 1.761010583720394, "learning_rate": 1.9226547439676404e-06, "loss": 0.7736, "step": 11244 }, { "epoch": 0.72, "grad_norm": 3.214485445765412, "learning_rate": 1.921837837363521e-06, "loss": 0.6155, "step": 11245 }, { "epoch": 0.72, "grad_norm": 1.5700427263499777, "learning_rate": 1.921021063052343e-06, "loss": 0.6282, "step": 11246 }, { "epoch": 0.72, "grad_norm": 1.5659496138700593, "learning_rate": 1.92020442106921e-06, "loss": 0.7454, "step": 11247 }, { "epoch": 0.72, "grad_norm": 1.0905411029489738, "learning_rate": 1.9193879114492198e-06, "loss": 0.6188, "step": 11248 }, { "epoch": 0.72, "grad_norm": 1.5320947151003905, "learning_rate": 1.918571534227462e-06, "loss": 0.6586, "step": 11249 }, { "epoch": 0.72, "grad_norm": 1.5766290371307587, "learning_rate": 1.917755289439028e-06, "loss": 0.6436, "step": 11250 }, { "epoch": 0.72, "grad_norm": 1.5117371712090375, "learning_rate": 1.9169391771189915e-06, "loss": 0.7007, "step": 11251 }, { "epoch": 0.72, "grad_norm": 1.2386048644595344, "learning_rate": 1.916123197302433e-06, "loss": 0.7011, "step": 11252 }, { "epoch": 0.72, "grad_norm": 1.4236763534797707, "learning_rate": 1.915307350024419e-06, "loss": 0.7463, "step": 11253 }, { "epoch": 0.72, "grad_norm": 1.7148076012311013, "learning_rate": 1.914491635320013e-06, "loss": 0.6545, "step": 11254 }, { "epoch": 0.72, "grad_norm": 2.2148227959719415, "learning_rate": 1.913676053224273e-06, "loss": 0.6576, "step": 11255 }, { "epoch": 0.72, "grad_norm": 1.498300270394339, "learning_rate": 1.9128606037722512e-06, "loss": 0.5913, "step": 11256 }, { "epoch": 0.72, "grad_norm": 1.7475874996118623, "learning_rate": 1.9120452869989943e-06, "loss": 0.7793, "step": 11257 }, { "epoch": 0.72, "grad_norm": 1.5414625804805637, "learning_rate": 1.9112301029395397e-06, "loss": 0.6493, "step": 11258 }, { "epoch": 0.72, "grad_norm": 1.7485474412009656, "learning_rate": 1.9104150516289283e-06, "loss": 0.6809, "step": 11259 }, { "epoch": 0.72, "grad_norm": 1.4688207337313706, "learning_rate": 1.909600133102183e-06, "loss": 0.5739, "step": 11260 }, { "epoch": 0.72, "grad_norm": 1.396687174351747, "learning_rate": 1.9087853473943313e-06, "loss": 0.6091, "step": 11261 }, { "epoch": 0.72, "grad_norm": 1.7664606620126806, "learning_rate": 1.9079706945403905e-06, "loss": 0.7166, "step": 11262 }, { "epoch": 0.72, "grad_norm": 1.8219434491220217, "learning_rate": 1.9071561745753715e-06, "loss": 0.725, "step": 11263 }, { "epoch": 0.72, "grad_norm": 1.3258993450868704, "learning_rate": 1.906341787534281e-06, "loss": 0.6306, "step": 11264 }, { "epoch": 0.72, "grad_norm": 1.5124042264902813, "learning_rate": 1.90552753345212e-06, "loss": 0.6066, "step": 11265 }, { "epoch": 0.72, "grad_norm": 1.4912938155715123, "learning_rate": 1.9047134123638833e-06, "loss": 0.6788, "step": 11266 }, { "epoch": 0.72, "grad_norm": 1.278401936534062, "learning_rate": 1.9038994243045582e-06, "loss": 0.7361, "step": 11267 }, { "epoch": 0.72, "grad_norm": 1.1964272532090525, "learning_rate": 1.9030855693091316e-06, "loss": 0.7282, "step": 11268 }, { "epoch": 0.72, "grad_norm": 1.5850815530448867, "learning_rate": 1.90227184741258e-06, "loss": 0.6425, "step": 11269 }, { "epoch": 0.72, "grad_norm": 1.485721993495984, "learning_rate": 1.9014582586498754e-06, "loss": 0.7016, "step": 11270 }, { "epoch": 0.72, "grad_norm": 1.7450766599594387, "learning_rate": 1.9006448030559832e-06, "loss": 0.648, "step": 11271 }, { "epoch": 0.72, "grad_norm": 1.6284050604267382, "learning_rate": 1.8998314806658652e-06, "loss": 0.6809, "step": 11272 }, { "epoch": 0.72, "grad_norm": 1.4714586416244329, "learning_rate": 1.899018291514476e-06, "loss": 0.5512, "step": 11273 }, { "epoch": 0.72, "grad_norm": 1.5323664133807724, "learning_rate": 1.8982052356367641e-06, "loss": 0.6984, "step": 11274 }, { "epoch": 0.72, "grad_norm": 1.22135242212641, "learning_rate": 1.897392313067672e-06, "loss": 0.6181, "step": 11275 }, { "epoch": 0.72, "grad_norm": 1.8824596489707168, "learning_rate": 1.8965795238421408e-06, "loss": 0.6399, "step": 11276 }, { "epoch": 0.72, "grad_norm": 1.5855652596380212, "learning_rate": 1.8957668679950997e-06, "loss": 0.6625, "step": 11277 }, { "epoch": 0.72, "grad_norm": 1.6512369730970113, "learning_rate": 1.8949543455614767e-06, "loss": 0.6541, "step": 11278 }, { "epoch": 0.72, "grad_norm": 1.4155562087742546, "learning_rate": 1.8941419565761903e-06, "loss": 0.5906, "step": 11279 }, { "epoch": 0.72, "grad_norm": 1.3946353949772219, "learning_rate": 1.8933297010741569e-06, "loss": 0.6746, "step": 11280 }, { "epoch": 0.72, "grad_norm": 1.8239554501217874, "learning_rate": 1.892517579090285e-06, "loss": 0.7671, "step": 11281 }, { "epoch": 0.72, "grad_norm": 1.0337044573052303, "learning_rate": 1.8917055906594755e-06, "loss": 0.608, "step": 11282 }, { "epoch": 0.72, "grad_norm": 1.651900650919498, "learning_rate": 1.8908937358166323e-06, "loss": 0.7399, "step": 11283 }, { "epoch": 0.72, "grad_norm": 1.7083070740736779, "learning_rate": 1.8900820145966397e-06, "loss": 0.6272, "step": 11284 }, { "epoch": 0.72, "grad_norm": 1.4921917029033975, "learning_rate": 1.8892704270343887e-06, "loss": 0.7647, "step": 11285 }, { "epoch": 0.72, "grad_norm": 1.8122746306063142, "learning_rate": 1.8884589731647584e-06, "loss": 0.6495, "step": 11286 }, { "epoch": 0.72, "grad_norm": 1.6590171181179856, "learning_rate": 1.8876476530226235e-06, "loss": 0.7368, "step": 11287 }, { "epoch": 0.72, "grad_norm": 1.6537343368123871, "learning_rate": 1.8868364666428523e-06, "loss": 0.7399, "step": 11288 }, { "epoch": 0.72, "grad_norm": 1.121860819283743, "learning_rate": 1.8860254140603063e-06, "loss": 0.6376, "step": 11289 }, { "epoch": 0.72, "grad_norm": 1.8157525284908989, "learning_rate": 1.8852144953098478e-06, "loss": 0.7123, "step": 11290 }, { "epoch": 0.72, "grad_norm": 1.2208467630190358, "learning_rate": 1.8844037104263225e-06, "loss": 0.5712, "step": 11291 }, { "epoch": 0.72, "grad_norm": 1.1246438616125738, "learning_rate": 1.8835930594445817e-06, "loss": 0.6509, "step": 11292 }, { "epoch": 0.72, "grad_norm": 1.561779765126918, "learning_rate": 1.8827825423994595e-06, "loss": 0.601, "step": 11293 }, { "epoch": 0.72, "grad_norm": 1.608984018649165, "learning_rate": 1.881972159325795e-06, "loss": 0.7342, "step": 11294 }, { "epoch": 0.72, "grad_norm": 1.4986267082948757, "learning_rate": 1.8811619102584155e-06, "loss": 0.6144, "step": 11295 }, { "epoch": 0.72, "grad_norm": 1.6146257224658098, "learning_rate": 1.8803517952321438e-06, "loss": 0.6933, "step": 11296 }, { "epoch": 0.72, "grad_norm": 1.7208802029857233, "learning_rate": 1.8795418142817962e-06, "loss": 0.62, "step": 11297 }, { "epoch": 0.72, "grad_norm": 1.7638918824997807, "learning_rate": 1.8787319674421827e-06, "loss": 0.6685, "step": 11298 }, { "epoch": 0.72, "grad_norm": 1.5855773401154463, "learning_rate": 1.877922254748114e-06, "loss": 0.7006, "step": 11299 }, { "epoch": 0.72, "grad_norm": 1.3867150015021381, "learning_rate": 1.8771126762343834e-06, "loss": 0.755, "step": 11300 }, { "epoch": 0.72, "grad_norm": 1.566612337065227, "learning_rate": 1.876303231935791e-06, "loss": 0.6237, "step": 11301 }, { "epoch": 0.72, "grad_norm": 1.417690842315997, "learning_rate": 1.8754939218871183e-06, "loss": 0.5851, "step": 11302 }, { "epoch": 0.72, "grad_norm": 1.401623069855573, "learning_rate": 1.8746847461231533e-06, "loss": 0.6232, "step": 11303 }, { "epoch": 0.72, "grad_norm": 1.3885077319080945, "learning_rate": 1.8738757046786705e-06, "loss": 0.6233, "step": 11304 }, { "epoch": 0.72, "grad_norm": 1.6322352169189778, "learning_rate": 1.8730667975884398e-06, "loss": 0.6161, "step": 11305 }, { "epoch": 0.72, "grad_norm": 1.587296797315521, "learning_rate": 1.8722580248872302e-06, "loss": 0.6133, "step": 11306 }, { "epoch": 0.72, "grad_norm": 1.4156072442846632, "learning_rate": 1.8714493866097955e-06, "loss": 0.4969, "step": 11307 }, { "epoch": 0.72, "grad_norm": 1.7639012336778324, "learning_rate": 1.8706408827908956e-06, "loss": 0.6897, "step": 11308 }, { "epoch": 0.72, "grad_norm": 1.6361289226858278, "learning_rate": 1.8698325134652711e-06, "loss": 0.6709, "step": 11309 }, { "epoch": 0.72, "grad_norm": 1.3832560411275212, "learning_rate": 1.86902427866767e-06, "loss": 0.6194, "step": 11310 }, { "epoch": 0.72, "grad_norm": 1.692412573958005, "learning_rate": 1.8682161784328262e-06, "loss": 0.6599, "step": 11311 }, { "epoch": 0.72, "grad_norm": 1.6402102353163939, "learning_rate": 1.8674082127954684e-06, "loss": 0.6163, "step": 11312 }, { "epoch": 0.72, "grad_norm": 1.5263184873293651, "learning_rate": 1.8666003817903267e-06, "loss": 0.6119, "step": 11313 }, { "epoch": 0.72, "grad_norm": 1.5380030082794198, "learning_rate": 1.8657926854521125e-06, "loss": 0.7476, "step": 11314 }, { "epoch": 0.72, "grad_norm": 1.0434651579861876, "learning_rate": 1.8649851238155465e-06, "loss": 0.733, "step": 11315 }, { "epoch": 0.72, "grad_norm": 1.408219728982806, "learning_rate": 1.864177696915329e-06, "loss": 0.6079, "step": 11316 }, { "epoch": 0.72, "grad_norm": 1.4256963533916707, "learning_rate": 1.8633704047861667e-06, "loss": 0.6448, "step": 11317 }, { "epoch": 0.72, "grad_norm": 1.6617893792250131, "learning_rate": 1.862563247462753e-06, "loss": 0.6385, "step": 11318 }, { "epoch": 0.72, "grad_norm": 1.4527239315321832, "learning_rate": 1.8617562249797788e-06, "loss": 0.6992, "step": 11319 }, { "epoch": 0.72, "grad_norm": 1.3662141916164894, "learning_rate": 1.8609493373719273e-06, "loss": 0.5915, "step": 11320 }, { "epoch": 0.72, "grad_norm": 1.4202537425592827, "learning_rate": 1.8601425846738775e-06, "loss": 0.5553, "step": 11321 }, { "epoch": 0.72, "grad_norm": 1.5658908821857898, "learning_rate": 1.859335966920301e-06, "loss": 0.6812, "step": 11322 }, { "epoch": 0.72, "grad_norm": 1.7964428750975918, "learning_rate": 1.858529484145864e-06, "loss": 0.7618, "step": 11323 }, { "epoch": 0.72, "grad_norm": 1.2177299393151506, "learning_rate": 1.8577231363852305e-06, "loss": 0.7462, "step": 11324 }, { "epoch": 0.72, "grad_norm": 1.7455586377096952, "learning_rate": 1.8569169236730533e-06, "loss": 0.6866, "step": 11325 }, { "epoch": 0.72, "grad_norm": 1.4755151316046118, "learning_rate": 1.8561108460439825e-06, "loss": 0.6553, "step": 11326 }, { "epoch": 0.72, "grad_norm": 1.6556222070839872, "learning_rate": 1.8553049035326615e-06, "loss": 0.5963, "step": 11327 }, { "epoch": 0.73, "grad_norm": 1.5477822648672457, "learning_rate": 1.8544990961737274e-06, "loss": 0.6477, "step": 11328 }, { "epoch": 0.73, "grad_norm": 1.5998799627054978, "learning_rate": 1.8536934240018129e-06, "loss": 0.6838, "step": 11329 }, { "epoch": 0.73, "grad_norm": 1.8286321859550918, "learning_rate": 1.8528878870515433e-06, "loss": 0.6172, "step": 11330 }, { "epoch": 0.73, "grad_norm": 1.606266365096482, "learning_rate": 1.852082485357538e-06, "loss": 0.7614, "step": 11331 }, { "epoch": 0.73, "grad_norm": 1.4338274631060937, "learning_rate": 1.8512772189544142e-06, "loss": 0.6152, "step": 11332 }, { "epoch": 0.73, "grad_norm": 1.465906028606371, "learning_rate": 1.8504720878767797e-06, "loss": 0.6354, "step": 11333 }, { "epoch": 0.73, "grad_norm": 0.9846725032770309, "learning_rate": 1.8496670921592364e-06, "loss": 0.684, "step": 11334 }, { "epoch": 0.73, "grad_norm": 1.823903795218896, "learning_rate": 1.8488622318363814e-06, "loss": 0.526, "step": 11335 }, { "epoch": 0.73, "grad_norm": 1.5543091045575061, "learning_rate": 1.848057506942807e-06, "loss": 0.6896, "step": 11336 }, { "epoch": 0.73, "grad_norm": 1.826083614933995, "learning_rate": 1.847252917513097e-06, "loss": 0.7299, "step": 11337 }, { "epoch": 0.73, "grad_norm": 2.2472361505845924, "learning_rate": 1.8464484635818326e-06, "loss": 0.606, "step": 11338 }, { "epoch": 0.73, "grad_norm": 1.508857425017206, "learning_rate": 1.845644145183586e-06, "loss": 0.6044, "step": 11339 }, { "epoch": 0.73, "grad_norm": 1.5581146821942822, "learning_rate": 1.8448399623529246e-06, "loss": 0.6088, "step": 11340 }, { "epoch": 0.73, "grad_norm": 1.6787886267553578, "learning_rate": 1.844035915124413e-06, "loss": 0.7101, "step": 11341 }, { "epoch": 0.73, "grad_norm": 1.6710615622732201, "learning_rate": 1.8432320035326062e-06, "loss": 0.652, "step": 11342 }, { "epoch": 0.73, "grad_norm": 1.7247589694507037, "learning_rate": 1.8424282276120547e-06, "loss": 0.6463, "step": 11343 }, { "epoch": 0.73, "grad_norm": 1.0914621989902606, "learning_rate": 1.8416245873973031e-06, "loss": 0.6486, "step": 11344 }, { "epoch": 0.73, "grad_norm": 1.8077575497404403, "learning_rate": 1.84082108292289e-06, "loss": 0.5793, "step": 11345 }, { "epoch": 0.73, "grad_norm": 1.7145522353747529, "learning_rate": 1.8400177142233489e-06, "loss": 0.6623, "step": 11346 }, { "epoch": 0.73, "grad_norm": 1.8848469842872904, "learning_rate": 1.8392144813332041e-06, "loss": 0.6277, "step": 11347 }, { "epoch": 0.73, "grad_norm": 1.881189993061084, "learning_rate": 1.8384113842869827e-06, "loss": 0.6324, "step": 11348 }, { "epoch": 0.73, "grad_norm": 1.3954859964158504, "learning_rate": 1.8376084231191932e-06, "loss": 0.5892, "step": 11349 }, { "epoch": 0.73, "grad_norm": 1.96319472679776, "learning_rate": 1.8368055978643501e-06, "loss": 0.6179, "step": 11350 }, { "epoch": 0.73, "grad_norm": 1.6271122241401315, "learning_rate": 1.8360029085569558e-06, "loss": 0.7388, "step": 11351 }, { "epoch": 0.73, "grad_norm": 1.2944561300708333, "learning_rate": 1.8352003552315078e-06, "loss": 0.7616, "step": 11352 }, { "epoch": 0.73, "grad_norm": 1.6096654458489172, "learning_rate": 1.8343979379224991e-06, "loss": 0.7937, "step": 11353 }, { "epoch": 0.73, "grad_norm": 2.2226874786748994, "learning_rate": 1.8335956566644125e-06, "loss": 0.5829, "step": 11354 }, { "epoch": 0.73, "grad_norm": 1.4264066143277847, "learning_rate": 1.832793511491735e-06, "loss": 0.6067, "step": 11355 }, { "epoch": 0.73, "grad_norm": 1.4233292639497193, "learning_rate": 1.831991502438934e-06, "loss": 0.6575, "step": 11356 }, { "epoch": 0.73, "grad_norm": 1.5953453304695695, "learning_rate": 1.831189629540484e-06, "loss": 0.6359, "step": 11357 }, { "epoch": 0.73, "grad_norm": 1.6459924958241214, "learning_rate": 1.8303878928308421e-06, "loss": 0.6692, "step": 11358 }, { "epoch": 0.73, "grad_norm": 1.6230888706885793, "learning_rate": 1.8295862923444702e-06, "loss": 0.7608, "step": 11359 }, { "epoch": 0.73, "grad_norm": 1.8196442618961126, "learning_rate": 1.8287848281158178e-06, "loss": 0.6833, "step": 11360 }, { "epoch": 0.73, "grad_norm": 1.436783671751686, "learning_rate": 1.8279835001793272e-06, "loss": 0.6805, "step": 11361 }, { "epoch": 0.73, "grad_norm": 2.27384846877819, "learning_rate": 1.8271823085694446e-06, "loss": 0.6641, "step": 11362 }, { "epoch": 0.73, "grad_norm": 1.0239921538211412, "learning_rate": 1.8263812533205955e-06, "loss": 0.6375, "step": 11363 }, { "epoch": 0.73, "grad_norm": 1.441819770087725, "learning_rate": 1.825580334467215e-06, "loss": 0.7009, "step": 11364 }, { "epoch": 0.73, "grad_norm": 1.0287054580230566, "learning_rate": 1.8247795520437177e-06, "loss": 0.6929, "step": 11365 }, { "epoch": 0.73, "grad_norm": 1.1907075119105421, "learning_rate": 1.8239789060845254e-06, "loss": 0.6342, "step": 11366 }, { "epoch": 0.73, "grad_norm": 0.998221493176142, "learning_rate": 1.8231783966240458e-06, "loss": 0.7096, "step": 11367 }, { "epoch": 0.73, "grad_norm": 1.4788635608527567, "learning_rate": 1.8223780236966832e-06, "loss": 0.6867, "step": 11368 }, { "epoch": 0.73, "grad_norm": 1.9496280891918998, "learning_rate": 1.8215777873368363e-06, "loss": 0.6147, "step": 11369 }, { "epoch": 0.73, "grad_norm": 1.7500613625134867, "learning_rate": 1.8207776875788952e-06, "loss": 0.6726, "step": 11370 }, { "epoch": 0.73, "grad_norm": 1.4743838041675141, "learning_rate": 1.8199777244572525e-06, "loss": 0.6208, "step": 11371 }, { "epoch": 0.73, "grad_norm": 1.8968106732137664, "learning_rate": 1.8191778980062813e-06, "loss": 0.7689, "step": 11372 }, { "epoch": 0.73, "grad_norm": 1.5681514619617891, "learning_rate": 1.8183782082603618e-06, "loss": 0.7092, "step": 11373 }, { "epoch": 0.73, "grad_norm": 1.5735926937710423, "learning_rate": 1.8175786552538616e-06, "loss": 0.7118, "step": 11374 }, { "epoch": 0.73, "grad_norm": 1.6392258772911854, "learning_rate": 1.8167792390211435e-06, "loss": 0.6023, "step": 11375 }, { "epoch": 0.73, "grad_norm": 1.6795226387417417, "learning_rate": 1.815979959596565e-06, "loss": 0.8094, "step": 11376 }, { "epoch": 0.73, "grad_norm": 2.0903682127539533, "learning_rate": 1.8151808170144751e-06, "loss": 0.6003, "step": 11377 }, { "epoch": 0.73, "grad_norm": 1.6844667380408689, "learning_rate": 1.8143818113092254e-06, "loss": 0.6999, "step": 11378 }, { "epoch": 0.73, "grad_norm": 1.553795948670651, "learning_rate": 1.813582942515148e-06, "loss": 0.7185, "step": 11379 }, { "epoch": 0.73, "grad_norm": 1.9058763237001883, "learning_rate": 1.8127842106665837e-06, "loss": 0.6691, "step": 11380 }, { "epoch": 0.73, "grad_norm": 1.053662131422342, "learning_rate": 1.8119856157978534e-06, "loss": 0.593, "step": 11381 }, { "epoch": 0.73, "grad_norm": 1.7401771381692512, "learning_rate": 1.8111871579432839e-06, "loss": 0.6925, "step": 11382 }, { "epoch": 0.73, "grad_norm": 1.4038408566340508, "learning_rate": 1.8103888371371898e-06, "loss": 0.6982, "step": 11383 }, { "epoch": 0.73, "grad_norm": 1.4541903034313644, "learning_rate": 1.8095906534138813e-06, "loss": 0.6592, "step": 11384 }, { "epoch": 0.73, "grad_norm": 1.546350360660487, "learning_rate": 1.8087926068076622e-06, "loss": 0.6698, "step": 11385 }, { "epoch": 0.73, "grad_norm": 1.4292181666755677, "learning_rate": 1.8079946973528295e-06, "loss": 0.6451, "step": 11386 }, { "epoch": 0.73, "grad_norm": 1.463674317984074, "learning_rate": 1.8071969250836813e-06, "loss": 0.6617, "step": 11387 }, { "epoch": 0.73, "grad_norm": 1.5074476586800687, "learning_rate": 1.8063992900344962e-06, "loss": 0.6316, "step": 11388 }, { "epoch": 0.73, "grad_norm": 1.7722684882563136, "learning_rate": 1.8056017922395607e-06, "loss": 0.7219, "step": 11389 }, { "epoch": 0.73, "grad_norm": 1.704139185428389, "learning_rate": 1.804804431733148e-06, "loss": 0.7254, "step": 11390 }, { "epoch": 0.73, "grad_norm": 1.6009522258609252, "learning_rate": 1.8040072085495276e-06, "loss": 0.7231, "step": 11391 }, { "epoch": 0.73, "grad_norm": 2.8918986645184366, "learning_rate": 1.8032101227229604e-06, "loss": 0.6921, "step": 11392 }, { "epoch": 0.73, "grad_norm": 1.7539099441130812, "learning_rate": 1.8024131742877054e-06, "loss": 0.6169, "step": 11393 }, { "epoch": 0.73, "grad_norm": 1.4379126419285913, "learning_rate": 1.8016163632780132e-06, "loss": 0.5748, "step": 11394 }, { "epoch": 0.73, "grad_norm": 1.6364741708034352, "learning_rate": 1.8008196897281287e-06, "loss": 0.6492, "step": 11395 }, { "epoch": 0.73, "grad_norm": 1.1800761782274034, "learning_rate": 1.8000231536722896e-06, "loss": 0.6738, "step": 11396 }, { "epoch": 0.73, "grad_norm": 1.6593369027172584, "learning_rate": 1.7992267551447334e-06, "loss": 0.5848, "step": 11397 }, { "epoch": 0.73, "grad_norm": 1.8341826480626442, "learning_rate": 1.7984304941796854e-06, "loss": 0.7213, "step": 11398 }, { "epoch": 0.73, "grad_norm": 1.0969616816632024, "learning_rate": 1.7976343708113675e-06, "loss": 0.625, "step": 11399 }, { "epoch": 0.73, "grad_norm": 1.6763199278988217, "learning_rate": 1.796838385073995e-06, "loss": 0.6404, "step": 11400 }, { "epoch": 0.73, "grad_norm": 1.0889123437695287, "learning_rate": 1.7960425370017782e-06, "loss": 0.6015, "step": 11401 }, { "epoch": 0.73, "grad_norm": 1.707753555583152, "learning_rate": 1.7952468266289203e-06, "loss": 0.6562, "step": 11402 }, { "epoch": 0.73, "grad_norm": 1.5143261105856511, "learning_rate": 1.794451253989618e-06, "loss": 0.658, "step": 11403 }, { "epoch": 0.73, "grad_norm": 1.4170496932372532, "learning_rate": 1.7936558191180686e-06, "loss": 0.568, "step": 11404 }, { "epoch": 0.73, "grad_norm": 1.6209045762176952, "learning_rate": 1.7928605220484513e-06, "loss": 0.6874, "step": 11405 }, { "epoch": 0.73, "grad_norm": 1.4246040844229133, "learning_rate": 1.7920653628149515e-06, "loss": 0.6614, "step": 11406 }, { "epoch": 0.73, "grad_norm": 1.7068939616945853, "learning_rate": 1.7912703414517413e-06, "loss": 0.7077, "step": 11407 }, { "epoch": 0.73, "grad_norm": 1.8275659136580733, "learning_rate": 1.7904754579929895e-06, "loss": 0.7445, "step": 11408 }, { "epoch": 0.73, "grad_norm": 1.799063600054732, "learning_rate": 1.7896807124728582e-06, "loss": 0.681, "step": 11409 }, { "epoch": 0.73, "grad_norm": 1.5426265511321071, "learning_rate": 1.7888861049255041e-06, "loss": 0.7558, "step": 11410 }, { "epoch": 0.73, "grad_norm": 1.6686482557973572, "learning_rate": 1.788091635385078e-06, "loss": 0.651, "step": 11411 }, { "epoch": 0.73, "grad_norm": 1.617126441801938, "learning_rate": 1.7872973038857221e-06, "loss": 0.691, "step": 11412 }, { "epoch": 0.73, "grad_norm": 1.8976802631216287, "learning_rate": 1.7865031104615809e-06, "loss": 0.724, "step": 11413 }, { "epoch": 0.73, "grad_norm": 1.5271777744480215, "learning_rate": 1.7857090551467805e-06, "loss": 0.5456, "step": 11414 }, { "epoch": 0.73, "grad_norm": 1.5465874424872097, "learning_rate": 1.784915137975452e-06, "loss": 0.6443, "step": 11415 }, { "epoch": 0.73, "grad_norm": 1.0804685477360365, "learning_rate": 1.7841213589817152e-06, "loss": 0.6664, "step": 11416 }, { "epoch": 0.73, "grad_norm": 1.4574131975686704, "learning_rate": 1.783327718199685e-06, "loss": 0.6435, "step": 11417 }, { "epoch": 0.73, "grad_norm": 5.240572804986611, "learning_rate": 1.7825342156634701e-06, "loss": 0.6541, "step": 11418 }, { "epoch": 0.73, "grad_norm": 1.6123077708150046, "learning_rate": 1.7817408514071722e-06, "loss": 0.6858, "step": 11419 }, { "epoch": 0.73, "grad_norm": 1.6272074115393746, "learning_rate": 1.7809476254648932e-06, "loss": 0.67, "step": 11420 }, { "epoch": 0.73, "grad_norm": 1.7453352134184024, "learning_rate": 1.780154537870718e-06, "loss": 0.5971, "step": 11421 }, { "epoch": 0.73, "grad_norm": 0.9724839764362624, "learning_rate": 1.7793615886587384e-06, "loss": 0.6269, "step": 11422 }, { "epoch": 0.73, "grad_norm": 1.8622324378345017, "learning_rate": 1.7785687778630268e-06, "loss": 0.6728, "step": 11423 }, { "epoch": 0.73, "grad_norm": 1.5500082529536583, "learning_rate": 1.7777761055176612e-06, "loss": 0.6191, "step": 11424 }, { "epoch": 0.73, "grad_norm": 1.7816194182626455, "learning_rate": 1.776983571656708e-06, "loss": 0.7083, "step": 11425 }, { "epoch": 0.73, "grad_norm": 2.0116296118641057, "learning_rate": 1.776191176314227e-06, "loss": 0.6851, "step": 11426 }, { "epoch": 0.73, "grad_norm": 1.6354106062707683, "learning_rate": 1.7753989195242782e-06, "loss": 0.7025, "step": 11427 }, { "epoch": 0.73, "grad_norm": 1.5121072981024353, "learning_rate": 1.7746068013209045e-06, "loss": 0.6682, "step": 11428 }, { "epoch": 0.73, "grad_norm": 1.4622858880896124, "learning_rate": 1.7738148217381568e-06, "loss": 0.597, "step": 11429 }, { "epoch": 0.73, "grad_norm": 1.434085841816764, "learning_rate": 1.7730229808100652e-06, "loss": 0.6853, "step": 11430 }, { "epoch": 0.73, "grad_norm": 1.5023771972176336, "learning_rate": 1.772231278570667e-06, "loss": 0.6842, "step": 11431 }, { "epoch": 0.73, "grad_norm": 1.206509481553044, "learning_rate": 1.7714397150539853e-06, "loss": 0.7516, "step": 11432 }, { "epoch": 0.73, "grad_norm": 1.8212412217881826, "learning_rate": 1.7706482902940397e-06, "loss": 0.6112, "step": 11433 }, { "epoch": 0.73, "grad_norm": 1.6287083646703753, "learning_rate": 1.7698570043248476e-06, "loss": 0.7281, "step": 11434 }, { "epoch": 0.73, "grad_norm": 1.3427628352712102, "learning_rate": 1.7690658571804109e-06, "loss": 0.7248, "step": 11435 }, { "epoch": 0.73, "grad_norm": 1.3976182474713523, "learning_rate": 1.7682748488947377e-06, "loss": 0.6639, "step": 11436 }, { "epoch": 0.73, "grad_norm": 1.4967972518251025, "learning_rate": 1.7674839795018173e-06, "loss": 0.6743, "step": 11437 }, { "epoch": 0.73, "grad_norm": 1.7951453579193997, "learning_rate": 1.7666932490356448e-06, "loss": 0.7064, "step": 11438 }, { "epoch": 0.73, "grad_norm": 1.0637886134794061, "learning_rate": 1.7659026575302025e-06, "loss": 0.7389, "step": 11439 }, { "epoch": 0.73, "grad_norm": 1.6799544918768123, "learning_rate": 1.765112205019468e-06, "loss": 0.6687, "step": 11440 }, { "epoch": 0.73, "grad_norm": 1.5332940376429776, "learning_rate": 1.764321891537414e-06, "loss": 0.6768, "step": 11441 }, { "epoch": 0.73, "grad_norm": 1.4449201142266843, "learning_rate": 1.7635317171180032e-06, "loss": 0.5894, "step": 11442 }, { "epoch": 0.73, "grad_norm": 1.5889597042454455, "learning_rate": 1.7627416817952032e-06, "loss": 0.7572, "step": 11443 }, { "epoch": 0.73, "grad_norm": 2.3137045222407755, "learning_rate": 1.7619517856029589e-06, "loss": 0.7159, "step": 11444 }, { "epoch": 0.73, "grad_norm": 1.9709432628687353, "learning_rate": 1.7611620285752246e-06, "loss": 0.6789, "step": 11445 }, { "epoch": 0.73, "grad_norm": 1.4459216852618875, "learning_rate": 1.7603724107459408e-06, "loss": 0.6035, "step": 11446 }, { "epoch": 0.73, "grad_norm": 1.5228775910465209, "learning_rate": 1.7595829321490437e-06, "loss": 0.6652, "step": 11447 }, { "epoch": 0.73, "grad_norm": 1.788985310961339, "learning_rate": 1.7587935928184624e-06, "loss": 0.6321, "step": 11448 }, { "epoch": 0.73, "grad_norm": 1.212212811433954, "learning_rate": 1.7580043927881224e-06, "loss": 0.612, "step": 11449 }, { "epoch": 0.73, "grad_norm": 1.6657363390849633, "learning_rate": 1.757215332091941e-06, "loss": 0.6715, "step": 11450 }, { "epoch": 0.73, "grad_norm": 1.7675717325644138, "learning_rate": 1.756426410763829e-06, "loss": 0.6437, "step": 11451 }, { "epoch": 0.73, "grad_norm": 1.2928301828489628, "learning_rate": 1.755637628837697e-06, "loss": 0.7157, "step": 11452 }, { "epoch": 0.73, "grad_norm": 1.5133782678160896, "learning_rate": 1.7548489863474393e-06, "loss": 0.7011, "step": 11453 }, { "epoch": 0.73, "grad_norm": 1.620489508790733, "learning_rate": 1.7540604833269553e-06, "loss": 0.6105, "step": 11454 }, { "epoch": 0.73, "grad_norm": 1.533841536008685, "learning_rate": 1.753272119810131e-06, "loss": 0.6113, "step": 11455 }, { "epoch": 0.73, "grad_norm": 1.8226111462122772, "learning_rate": 1.7524838958308481e-06, "loss": 0.6326, "step": 11456 }, { "epoch": 0.73, "grad_norm": 1.567384115905898, "learning_rate": 1.7516958114229837e-06, "loss": 0.5949, "step": 11457 }, { "epoch": 0.73, "grad_norm": 1.3404817279169472, "learning_rate": 1.7509078666204076e-06, "loss": 0.6456, "step": 11458 }, { "epoch": 0.73, "grad_norm": 1.552604971679825, "learning_rate": 1.7501200614569847e-06, "loss": 0.6158, "step": 11459 }, { "epoch": 0.73, "grad_norm": 1.5181165534149332, "learning_rate": 1.7493323959665703e-06, "loss": 0.6866, "step": 11460 }, { "epoch": 0.73, "grad_norm": 1.5266170334917595, "learning_rate": 1.7485448701830205e-06, "loss": 0.7107, "step": 11461 }, { "epoch": 0.73, "grad_norm": 1.442417821045499, "learning_rate": 1.74775748414018e-06, "loss": 0.6032, "step": 11462 }, { "epoch": 0.73, "grad_norm": 1.624142491181866, "learning_rate": 1.7469702378718894e-06, "loss": 0.6909, "step": 11463 }, { "epoch": 0.73, "grad_norm": 1.9397901406105957, "learning_rate": 1.746183131411982e-06, "loss": 0.6343, "step": 11464 }, { "epoch": 0.73, "grad_norm": 1.57228826742618, "learning_rate": 1.7453961647942868e-06, "loss": 0.6799, "step": 11465 }, { "epoch": 0.73, "grad_norm": 1.556579005390565, "learning_rate": 1.7446093380526253e-06, "loss": 0.5617, "step": 11466 }, { "epoch": 0.73, "grad_norm": 1.503507376458844, "learning_rate": 1.743822651220814e-06, "loss": 0.6389, "step": 11467 }, { "epoch": 0.73, "grad_norm": 1.6657803100981838, "learning_rate": 1.743036104332661e-06, "loss": 0.6881, "step": 11468 }, { "epoch": 0.73, "grad_norm": 1.8069862182692196, "learning_rate": 1.7422496974219761e-06, "loss": 0.716, "step": 11469 }, { "epoch": 0.73, "grad_norm": 1.2576291658492722, "learning_rate": 1.7414634305225504e-06, "loss": 0.6457, "step": 11470 }, { "epoch": 0.73, "grad_norm": 1.2699438468835862, "learning_rate": 1.7406773036681807e-06, "loss": 0.7435, "step": 11471 }, { "epoch": 0.73, "grad_norm": 1.5241820764025924, "learning_rate": 1.7398913168926523e-06, "loss": 0.6055, "step": 11472 }, { "epoch": 0.73, "grad_norm": 1.5052669345110627, "learning_rate": 1.7391054702297439e-06, "loss": 0.6956, "step": 11473 }, { "epoch": 0.73, "grad_norm": 1.5099952884938368, "learning_rate": 1.738319763713231e-06, "loss": 0.6986, "step": 11474 }, { "epoch": 0.73, "grad_norm": 1.5000187853260158, "learning_rate": 1.737534197376879e-06, "loss": 0.6353, "step": 11475 }, { "epoch": 0.73, "grad_norm": 1.7067313847659173, "learning_rate": 1.736748771254455e-06, "loss": 0.6513, "step": 11476 }, { "epoch": 0.73, "grad_norm": 1.5479425799855564, "learning_rate": 1.7359634853797081e-06, "loss": 0.6668, "step": 11477 }, { "epoch": 0.73, "grad_norm": 1.4977754103679413, "learning_rate": 1.7351783397863958e-06, "loss": 0.6484, "step": 11478 }, { "epoch": 0.73, "grad_norm": 1.6840203165088936, "learning_rate": 1.7343933345082547e-06, "loss": 0.6574, "step": 11479 }, { "epoch": 0.73, "grad_norm": 1.5511501897197262, "learning_rate": 1.7336084695790278e-06, "loss": 0.6041, "step": 11480 }, { "epoch": 0.73, "grad_norm": 1.465423376700319, "learning_rate": 1.7328237450324454e-06, "loss": 0.5986, "step": 11481 }, { "epoch": 0.73, "grad_norm": 1.5292466099666577, "learning_rate": 1.7320391609022308e-06, "loss": 0.7893, "step": 11482 }, { "epoch": 0.73, "grad_norm": 1.4663830693403506, "learning_rate": 1.73125471722211e-06, "loss": 0.6993, "step": 11483 }, { "epoch": 0.74, "grad_norm": 1.5462630524641163, "learning_rate": 1.7304704140257894e-06, "loss": 0.7369, "step": 11484 }, { "epoch": 0.74, "grad_norm": 1.686136219888309, "learning_rate": 1.7296862513469836e-06, "loss": 0.7648, "step": 11485 }, { "epoch": 0.74, "grad_norm": 1.7895688496094788, "learning_rate": 1.7289022292193875e-06, "loss": 0.7239, "step": 11486 }, { "epoch": 0.74, "grad_norm": 1.6278138683424412, "learning_rate": 1.7281183476767016e-06, "loss": 0.6463, "step": 11487 }, { "epoch": 0.74, "grad_norm": 0.9916602574828061, "learning_rate": 1.7273346067526143e-06, "loss": 0.6458, "step": 11488 }, { "epoch": 0.74, "grad_norm": 1.694735489884516, "learning_rate": 1.7265510064808084e-06, "loss": 0.6392, "step": 11489 }, { "epoch": 0.74, "grad_norm": 1.4093584074279404, "learning_rate": 1.725767546894962e-06, "loss": 0.6112, "step": 11490 }, { "epoch": 0.74, "grad_norm": 1.56147647137136, "learning_rate": 1.7249842280287442e-06, "loss": 0.6541, "step": 11491 }, { "epoch": 0.74, "grad_norm": 1.6166547685215458, "learning_rate": 1.7242010499158263e-06, "loss": 0.6587, "step": 11492 }, { "epoch": 0.74, "grad_norm": 1.3721249646071927, "learning_rate": 1.7234180125898608e-06, "loss": 0.597, "step": 11493 }, { "epoch": 0.74, "grad_norm": 0.952834670937845, "learning_rate": 1.7226351160845067e-06, "loss": 0.6866, "step": 11494 }, { "epoch": 0.74, "grad_norm": 1.4864788981087185, "learning_rate": 1.721852360433406e-06, "loss": 0.6826, "step": 11495 }, { "epoch": 0.74, "grad_norm": 1.6509973801071978, "learning_rate": 1.7210697456702036e-06, "loss": 0.6234, "step": 11496 }, { "epoch": 0.74, "grad_norm": 1.62411098639562, "learning_rate": 1.7202872718285341e-06, "loss": 0.6878, "step": 11497 }, { "epoch": 0.74, "grad_norm": 1.9594447906240178, "learning_rate": 1.7195049389420238e-06, "loss": 0.7457, "step": 11498 }, { "epoch": 0.74, "grad_norm": 1.5605792465102246, "learning_rate": 1.7187227470443013e-06, "loss": 0.6289, "step": 11499 }, { "epoch": 0.74, "grad_norm": 1.6109202857033682, "learning_rate": 1.717940696168977e-06, "loss": 0.7498, "step": 11500 }, { "epoch": 0.74, "grad_norm": 1.5485232446486819, "learning_rate": 1.7171587863496686e-06, "loss": 0.7123, "step": 11501 }, { "epoch": 0.74, "grad_norm": 1.4816022416029468, "learning_rate": 1.716377017619974e-06, "loss": 0.6291, "step": 11502 }, { "epoch": 0.74, "grad_norm": 1.9546938536362752, "learning_rate": 1.715595390013497e-06, "loss": 0.7205, "step": 11503 }, { "epoch": 0.74, "grad_norm": 1.6791855781322973, "learning_rate": 1.7148139035638283e-06, "loss": 0.7286, "step": 11504 }, { "epoch": 0.74, "grad_norm": 1.0983723146167383, "learning_rate": 1.7140325583045553e-06, "loss": 0.634, "step": 11505 }, { "epoch": 0.74, "grad_norm": 1.209208063469758, "learning_rate": 1.7132513542692581e-06, "loss": 0.5974, "step": 11506 }, { "epoch": 0.74, "grad_norm": 1.5296569215378912, "learning_rate": 1.7124702914915097e-06, "loss": 0.6479, "step": 11507 }, { "epoch": 0.74, "grad_norm": 1.5183671878078953, "learning_rate": 1.7116893700048832e-06, "loss": 0.6637, "step": 11508 }, { "epoch": 0.74, "grad_norm": 1.8099368432199678, "learning_rate": 1.7109085898429345e-06, "loss": 0.7911, "step": 11509 }, { "epoch": 0.74, "grad_norm": 1.391379185784755, "learning_rate": 1.7101279510392255e-06, "loss": 0.5873, "step": 11510 }, { "epoch": 0.74, "grad_norm": 2.206800917720188, "learning_rate": 1.7093474536273037e-06, "loss": 0.7399, "step": 11511 }, { "epoch": 0.74, "grad_norm": 1.5424315935203927, "learning_rate": 1.7085670976407137e-06, "loss": 0.5661, "step": 11512 }, { "epoch": 0.74, "grad_norm": 1.5361330770809056, "learning_rate": 1.7077868831129935e-06, "loss": 0.6651, "step": 11513 }, { "epoch": 0.74, "grad_norm": 1.7678027212207819, "learning_rate": 1.7070068100776755e-06, "loss": 0.6719, "step": 11514 }, { "epoch": 0.74, "grad_norm": 1.7525858369970762, "learning_rate": 1.7062268785682852e-06, "loss": 0.6599, "step": 11515 }, { "epoch": 0.74, "grad_norm": 2.517685314763794, "learning_rate": 1.7054470886183405e-06, "loss": 0.6599, "step": 11516 }, { "epoch": 0.74, "grad_norm": 1.5419376831986897, "learning_rate": 1.7046674402613594e-06, "loss": 0.7157, "step": 11517 }, { "epoch": 0.74, "grad_norm": 1.6650408505321042, "learning_rate": 1.7038879335308466e-06, "loss": 0.6132, "step": 11518 }, { "epoch": 0.74, "grad_norm": 1.7203118631024463, "learning_rate": 1.703108568460305e-06, "loss": 0.6297, "step": 11519 }, { "epoch": 0.74, "grad_norm": 1.65269596151815, "learning_rate": 1.702329345083229e-06, "loss": 0.6427, "step": 11520 }, { "epoch": 0.74, "grad_norm": 1.5013426893389124, "learning_rate": 1.7015502634331083e-06, "loss": 0.6642, "step": 11521 }, { "epoch": 0.74, "grad_norm": 1.410056091953206, "learning_rate": 1.7007713235434264e-06, "loss": 0.6181, "step": 11522 }, { "epoch": 0.74, "grad_norm": 1.689113002095047, "learning_rate": 1.6999925254476606e-06, "loss": 0.7179, "step": 11523 }, { "epoch": 0.74, "grad_norm": 1.2450773487116413, "learning_rate": 1.6992138691792798e-06, "loss": 0.6088, "step": 11524 }, { "epoch": 0.74, "grad_norm": 1.5100244969796688, "learning_rate": 1.698435354771753e-06, "loss": 0.7202, "step": 11525 }, { "epoch": 0.74, "grad_norm": 1.535276656479583, "learning_rate": 1.6976569822585366e-06, "loss": 0.6637, "step": 11526 }, { "epoch": 0.74, "grad_norm": 1.585543885622654, "learning_rate": 1.6968787516730845e-06, "loss": 0.6166, "step": 11527 }, { "epoch": 0.74, "grad_norm": 1.3776312673120918, "learning_rate": 1.6961006630488424e-06, "loss": 0.5638, "step": 11528 }, { "epoch": 0.74, "grad_norm": 1.9582865471069877, "learning_rate": 1.6953227164192516e-06, "loss": 0.5892, "step": 11529 }, { "epoch": 0.74, "grad_norm": 1.6072525196456389, "learning_rate": 1.694544911817746e-06, "loss": 0.625, "step": 11530 }, { "epoch": 0.74, "grad_norm": 1.5629561164719428, "learning_rate": 1.6937672492777547e-06, "loss": 0.6429, "step": 11531 }, { "epoch": 0.74, "grad_norm": 1.631719688764812, "learning_rate": 1.6929897288326996e-06, "loss": 0.7253, "step": 11532 }, { "epoch": 0.74, "grad_norm": 1.0778721746225806, "learning_rate": 1.6922123505159955e-06, "loss": 0.7453, "step": 11533 }, { "epoch": 0.74, "grad_norm": 1.4361014593458217, "learning_rate": 1.6914351143610553e-06, "loss": 0.676, "step": 11534 }, { "epoch": 0.74, "grad_norm": 1.67953217280819, "learning_rate": 1.6906580204012818e-06, "loss": 0.6562, "step": 11535 }, { "epoch": 0.74, "grad_norm": 1.6421035871006264, "learning_rate": 1.6898810686700728e-06, "loss": 0.6723, "step": 11536 }, { "epoch": 0.74, "grad_norm": 1.6604593381767176, "learning_rate": 1.68910425920082e-06, "loss": 0.6599, "step": 11537 }, { "epoch": 0.74, "grad_norm": 1.5809099134486324, "learning_rate": 1.6883275920269087e-06, "loss": 0.634, "step": 11538 }, { "epoch": 0.74, "grad_norm": 1.2261739713356687, "learning_rate": 1.6875510671817186e-06, "loss": 0.728, "step": 11539 }, { "epoch": 0.74, "grad_norm": 1.702631494815809, "learning_rate": 1.6867746846986215e-06, "loss": 0.6717, "step": 11540 }, { "epoch": 0.74, "grad_norm": 1.4105753828921093, "learning_rate": 1.6859984446109906e-06, "loss": 0.6188, "step": 11541 }, { "epoch": 0.74, "grad_norm": 1.5098221718983458, "learning_rate": 1.6852223469521789e-06, "loss": 0.5898, "step": 11542 }, { "epoch": 0.74, "grad_norm": 1.5538435011903373, "learning_rate": 1.6844463917555487e-06, "loss": 0.6629, "step": 11543 }, { "epoch": 0.74, "grad_norm": 1.7150020865955942, "learning_rate": 1.6836705790544422e-06, "loss": 0.7215, "step": 11544 }, { "epoch": 0.74, "grad_norm": 1.4571453573350364, "learning_rate": 1.6828949088822077e-06, "loss": 0.6628, "step": 11545 }, { "epoch": 0.74, "grad_norm": 1.449402976565628, "learning_rate": 1.6821193812721797e-06, "loss": 0.6831, "step": 11546 }, { "epoch": 0.74, "grad_norm": 1.6171502687127672, "learning_rate": 1.6813439962576872e-06, "loss": 0.6898, "step": 11547 }, { "epoch": 0.74, "grad_norm": 1.572102777278318, "learning_rate": 1.6805687538720595e-06, "loss": 0.6445, "step": 11548 }, { "epoch": 0.74, "grad_norm": 1.4927721902135158, "learning_rate": 1.6797936541486082e-06, "loss": 0.6264, "step": 11549 }, { "epoch": 0.74, "grad_norm": 1.5242827235295546, "learning_rate": 1.6790186971206523e-06, "loss": 0.6088, "step": 11550 }, { "epoch": 0.74, "grad_norm": 1.5275114460737764, "learning_rate": 1.6782438828214913e-06, "loss": 0.6121, "step": 11551 }, { "epoch": 0.74, "grad_norm": 1.8182492393426768, "learning_rate": 1.6774692112844303e-06, "loss": 0.7101, "step": 11552 }, { "epoch": 0.74, "grad_norm": 1.5326089479899687, "learning_rate": 1.6766946825427605e-06, "loss": 0.5682, "step": 11553 }, { "epoch": 0.74, "grad_norm": 1.4300438878670334, "learning_rate": 1.6759202966297682e-06, "loss": 0.7402, "step": 11554 }, { "epoch": 0.74, "grad_norm": 1.5267438169525258, "learning_rate": 1.6751460535787407e-06, "loss": 0.6391, "step": 11555 }, { "epoch": 0.74, "grad_norm": 1.3558296806008723, "learning_rate": 1.6743719534229452e-06, "loss": 0.6351, "step": 11556 }, { "epoch": 0.74, "grad_norm": 1.000844127476391, "learning_rate": 1.6735979961956588e-06, "loss": 0.6133, "step": 11557 }, { "epoch": 0.74, "grad_norm": 1.8888580931749424, "learning_rate": 1.6728241819301377e-06, "loss": 0.7289, "step": 11558 }, { "epoch": 0.74, "grad_norm": 1.889430926188884, "learning_rate": 1.6720505106596429e-06, "loss": 0.6064, "step": 11559 }, { "epoch": 0.74, "grad_norm": 1.805637476536109, "learning_rate": 1.6712769824174245e-06, "loss": 0.8042, "step": 11560 }, { "epoch": 0.74, "grad_norm": 1.5720753939518326, "learning_rate": 1.6705035972367272e-06, "loss": 0.6307, "step": 11561 }, { "epoch": 0.74, "grad_norm": 1.5302347950292217, "learning_rate": 1.6697303551507888e-06, "loss": 0.6997, "step": 11562 }, { "epoch": 0.74, "grad_norm": 1.1092059777356773, "learning_rate": 1.6689572561928397e-06, "loss": 0.603, "step": 11563 }, { "epoch": 0.74, "grad_norm": 1.6407717250997258, "learning_rate": 1.6681843003961124e-06, "loss": 0.6144, "step": 11564 }, { "epoch": 0.74, "grad_norm": 1.5788149353753074, "learning_rate": 1.6674114877938185e-06, "loss": 0.6865, "step": 11565 }, { "epoch": 0.74, "grad_norm": 1.5144973905547934, "learning_rate": 1.6666388184191784e-06, "loss": 0.6979, "step": 11566 }, { "epoch": 0.74, "grad_norm": 1.7354697807761055, "learning_rate": 1.6658662923053974e-06, "loss": 0.7096, "step": 11567 }, { "epoch": 0.74, "grad_norm": 1.4139229258788515, "learning_rate": 1.6650939094856772e-06, "loss": 0.767, "step": 11568 }, { "epoch": 0.74, "grad_norm": 1.6115048501524059, "learning_rate": 1.664321669993213e-06, "loss": 0.6626, "step": 11569 }, { "epoch": 0.74, "grad_norm": 1.7075272327717215, "learning_rate": 1.6635495738611945e-06, "loss": 0.6692, "step": 11570 }, { "epoch": 0.74, "grad_norm": 1.5735475132088423, "learning_rate": 1.6627776211228041e-06, "loss": 0.6049, "step": 11571 }, { "epoch": 0.74, "grad_norm": 1.6971220457273, "learning_rate": 1.6620058118112182e-06, "loss": 0.6392, "step": 11572 }, { "epoch": 0.74, "grad_norm": 1.5674673827652015, "learning_rate": 1.661234145959611e-06, "loss": 0.5978, "step": 11573 }, { "epoch": 0.74, "grad_norm": 1.578263928021032, "learning_rate": 1.6604626236011413e-06, "loss": 0.7777, "step": 11574 }, { "epoch": 0.74, "grad_norm": 1.5082782765535627, "learning_rate": 1.6596912447689723e-06, "loss": 0.6555, "step": 11575 }, { "epoch": 0.74, "grad_norm": 1.6068232914937197, "learning_rate": 1.658920009496255e-06, "loss": 0.6903, "step": 11576 }, { "epoch": 0.74, "grad_norm": 1.8288170099323355, "learning_rate": 1.6581489178161348e-06, "loss": 0.6491, "step": 11577 }, { "epoch": 0.74, "grad_norm": 1.6477760965901274, "learning_rate": 1.657377969761752e-06, "loss": 0.6303, "step": 11578 }, { "epoch": 0.74, "grad_norm": 1.6322895859336304, "learning_rate": 1.6566071653662403e-06, "loss": 0.6207, "step": 11579 }, { "epoch": 0.74, "grad_norm": 1.656904627963353, "learning_rate": 1.6558365046627268e-06, "loss": 0.7079, "step": 11580 }, { "epoch": 0.74, "grad_norm": 1.4420908765220557, "learning_rate": 1.6550659876843317e-06, "loss": 0.6445, "step": 11581 }, { "epoch": 0.74, "grad_norm": 1.9291513964542832, "learning_rate": 1.6542956144641742e-06, "loss": 0.5714, "step": 11582 }, { "epoch": 0.74, "grad_norm": 1.545620947639161, "learning_rate": 1.65352538503536e-06, "loss": 0.7043, "step": 11583 }, { "epoch": 0.74, "grad_norm": 1.6685039319138222, "learning_rate": 1.652755299430993e-06, "loss": 0.7205, "step": 11584 }, { "epoch": 0.74, "grad_norm": 1.4722670141112737, "learning_rate": 1.6519853576841698e-06, "loss": 0.6294, "step": 11585 }, { "epoch": 0.74, "grad_norm": 1.626356741860351, "learning_rate": 1.651215559827981e-06, "loss": 0.6927, "step": 11586 }, { "epoch": 0.74, "grad_norm": 1.5146463413273594, "learning_rate": 1.6504459058955108e-06, "loss": 0.6747, "step": 11587 }, { "epoch": 0.74, "grad_norm": 1.4143866245839658, "learning_rate": 1.649676395919837e-06, "loss": 0.722, "step": 11588 }, { "epoch": 0.74, "grad_norm": 1.4726812733961692, "learning_rate": 1.6489070299340298e-06, "loss": 0.6318, "step": 11589 }, { "epoch": 0.74, "grad_norm": 1.8412033644137589, "learning_rate": 1.6481378079711586e-06, "loss": 0.6971, "step": 11590 }, { "epoch": 0.74, "grad_norm": 1.1590756350795985, "learning_rate": 1.6473687300642815e-06, "loss": 0.7149, "step": 11591 }, { "epoch": 0.74, "grad_norm": 1.6108168974064672, "learning_rate": 1.6465997962464514e-06, "loss": 0.7053, "step": 11592 }, { "epoch": 0.74, "grad_norm": 1.065740233119982, "learning_rate": 1.6458310065507154e-06, "loss": 0.6795, "step": 11593 }, { "epoch": 0.74, "grad_norm": 1.5444264237704695, "learning_rate": 1.6450623610101147e-06, "loss": 0.5773, "step": 11594 }, { "epoch": 0.74, "grad_norm": 1.4796278495660986, "learning_rate": 1.6442938596576842e-06, "loss": 0.6138, "step": 11595 }, { "epoch": 0.74, "grad_norm": 1.8382498980774287, "learning_rate": 1.6435255025264502e-06, "loss": 0.6122, "step": 11596 }, { "epoch": 0.74, "grad_norm": 1.4909922192051837, "learning_rate": 1.6427572896494408e-06, "loss": 0.7049, "step": 11597 }, { "epoch": 0.74, "grad_norm": 1.4636317145451843, "learning_rate": 1.6419892210596655e-06, "loss": 0.7384, "step": 11598 }, { "epoch": 0.74, "grad_norm": 1.7118461356629402, "learning_rate": 1.6412212967901386e-06, "loss": 0.643, "step": 11599 }, { "epoch": 0.74, "grad_norm": 1.196507688612977, "learning_rate": 1.6404535168738629e-06, "loss": 0.6909, "step": 11600 }, { "epoch": 0.74, "grad_norm": 1.26205927390231, "learning_rate": 1.6396858813438355e-06, "loss": 0.5961, "step": 11601 }, { "epoch": 0.74, "grad_norm": 1.6624648341476806, "learning_rate": 1.6389183902330475e-06, "loss": 0.6801, "step": 11602 }, { "epoch": 0.74, "grad_norm": 1.4873579673331836, "learning_rate": 1.638151043574485e-06, "loss": 0.5529, "step": 11603 }, { "epoch": 0.74, "grad_norm": 2.08111555851593, "learning_rate": 1.6373838414011261e-06, "loss": 0.604, "step": 11604 }, { "epoch": 0.74, "grad_norm": 1.5127374817935195, "learning_rate": 1.6366167837459429e-06, "loss": 0.6607, "step": 11605 }, { "epoch": 0.74, "grad_norm": 1.5769577290088808, "learning_rate": 1.6358498706419056e-06, "loss": 0.6102, "step": 11606 }, { "epoch": 0.74, "grad_norm": 1.6849951345887708, "learning_rate": 1.6350831021219686e-06, "loss": 0.7567, "step": 11607 }, { "epoch": 0.74, "grad_norm": 1.58697798074393, "learning_rate": 1.6343164782190906e-06, "loss": 0.6949, "step": 11608 }, { "epoch": 0.74, "grad_norm": 0.9382939846207076, "learning_rate": 1.6335499989662185e-06, "loss": 0.5113, "step": 11609 }, { "epoch": 0.74, "grad_norm": 1.4283056265345324, "learning_rate": 1.6327836643962936e-06, "loss": 0.5744, "step": 11610 }, { "epoch": 0.74, "grad_norm": 1.526158124375945, "learning_rate": 1.6320174745422513e-06, "loss": 0.6466, "step": 11611 }, { "epoch": 0.74, "grad_norm": 1.7393775453911569, "learning_rate": 1.631251429437019e-06, "loss": 0.6852, "step": 11612 }, { "epoch": 0.74, "grad_norm": 1.6496789776278373, "learning_rate": 1.6304855291135247e-06, "loss": 0.7099, "step": 11613 }, { "epoch": 0.74, "grad_norm": 1.4905345054889483, "learning_rate": 1.629719773604679e-06, "loss": 0.653, "step": 11614 }, { "epoch": 0.74, "grad_norm": 1.5696681919757485, "learning_rate": 1.6289541629433986e-06, "loss": 0.6602, "step": 11615 }, { "epoch": 0.74, "grad_norm": 1.6495958863585622, "learning_rate": 1.6281886971625815e-06, "loss": 0.7255, "step": 11616 }, { "epoch": 0.74, "grad_norm": 1.0412917525017817, "learning_rate": 1.6274233762951308e-06, "loss": 0.6905, "step": 11617 }, { "epoch": 0.74, "grad_norm": 1.764757538875048, "learning_rate": 1.6266582003739367e-06, "loss": 0.7531, "step": 11618 }, { "epoch": 0.74, "grad_norm": 1.389331518573762, "learning_rate": 1.6258931694318831e-06, "loss": 0.6131, "step": 11619 }, { "epoch": 0.74, "grad_norm": 1.1190363174604132, "learning_rate": 1.625128283501854e-06, "loss": 0.5882, "step": 11620 }, { "epoch": 0.74, "grad_norm": 1.6576191083783185, "learning_rate": 1.624363542616717e-06, "loss": 0.6694, "step": 11621 }, { "epoch": 0.74, "grad_norm": 1.6341659477280917, "learning_rate": 1.6235989468093444e-06, "loss": 0.6203, "step": 11622 }, { "epoch": 0.74, "grad_norm": 1.537274465351559, "learning_rate": 1.6228344961125914e-06, "loss": 0.6735, "step": 11623 }, { "epoch": 0.74, "grad_norm": 2.8374014267685594, "learning_rate": 1.6220701905593168e-06, "loss": 0.6712, "step": 11624 }, { "epoch": 0.74, "grad_norm": 1.5209939993114372, "learning_rate": 1.6213060301823674e-06, "loss": 0.7785, "step": 11625 }, { "epoch": 0.74, "grad_norm": 1.2925198119857413, "learning_rate": 1.6205420150145857e-06, "loss": 0.6802, "step": 11626 }, { "epoch": 0.74, "grad_norm": 1.6503649030152359, "learning_rate": 1.6197781450888067e-06, "loss": 0.7371, "step": 11627 }, { "epoch": 0.74, "grad_norm": 1.5359691821939236, "learning_rate": 1.6190144204378583e-06, "loss": 0.6413, "step": 11628 }, { "epoch": 0.74, "grad_norm": 1.707475725983066, "learning_rate": 1.618250841094569e-06, "loss": 0.6896, "step": 11629 }, { "epoch": 0.74, "grad_norm": 1.9732957321455897, "learning_rate": 1.6174874070917495e-06, "loss": 0.6551, "step": 11630 }, { "epoch": 0.74, "grad_norm": 1.1567386804699433, "learning_rate": 1.616724118462215e-06, "loss": 0.6262, "step": 11631 }, { "epoch": 0.74, "grad_norm": 1.5253522994448288, "learning_rate": 1.615960975238769e-06, "loss": 0.7163, "step": 11632 }, { "epoch": 0.74, "grad_norm": 1.4619984974143871, "learning_rate": 1.6151979774542087e-06, "loss": 0.6152, "step": 11633 }, { "epoch": 0.74, "grad_norm": 2.134736490685951, "learning_rate": 1.6144351251413277e-06, "loss": 0.6977, "step": 11634 }, { "epoch": 0.74, "grad_norm": 1.814588980929659, "learning_rate": 1.6136724183329106e-06, "loss": 0.7385, "step": 11635 }, { "epoch": 0.74, "grad_norm": 1.678611247101367, "learning_rate": 1.6129098570617374e-06, "loss": 0.6437, "step": 11636 }, { "epoch": 0.74, "grad_norm": 1.3181598482733259, "learning_rate": 1.6121474413605792e-06, "loss": 0.6522, "step": 11637 }, { "epoch": 0.74, "grad_norm": 1.5008378519559054, "learning_rate": 1.6113851712622076e-06, "loss": 0.6508, "step": 11638 }, { "epoch": 0.74, "grad_norm": 1.656694423358347, "learning_rate": 1.61062304679938e-06, "loss": 0.6351, "step": 11639 }, { "epoch": 0.75, "grad_norm": 1.623498846631093, "learning_rate": 1.6098610680048527e-06, "loss": 0.7074, "step": 11640 }, { "epoch": 0.75, "grad_norm": 1.6715721108518298, "learning_rate": 1.609099234911372e-06, "loss": 0.6826, "step": 11641 }, { "epoch": 0.75, "grad_norm": 1.6929540576997717, "learning_rate": 1.6083375475516815e-06, "loss": 0.6394, "step": 11642 }, { "epoch": 0.75, "grad_norm": 1.930377491827557, "learning_rate": 1.6075760059585166e-06, "loss": 0.6541, "step": 11643 }, { "epoch": 0.75, "grad_norm": 1.6160560482008666, "learning_rate": 1.6068146101646059e-06, "loss": 0.6468, "step": 11644 }, { "epoch": 0.75, "grad_norm": 1.6540161497855308, "learning_rate": 1.6060533602026734e-06, "loss": 0.6796, "step": 11645 }, { "epoch": 0.75, "grad_norm": 2.8627209091909656, "learning_rate": 1.6052922561054346e-06, "loss": 0.6916, "step": 11646 }, { "epoch": 0.75, "grad_norm": 1.5481438592175911, "learning_rate": 1.6045312979056027e-06, "loss": 0.6606, "step": 11647 }, { "epoch": 0.75, "grad_norm": 1.541237037939651, "learning_rate": 1.6037704856358805e-06, "loss": 0.6242, "step": 11648 }, { "epoch": 0.75, "grad_norm": 1.7512433574459982, "learning_rate": 1.6030098193289667e-06, "loss": 0.7129, "step": 11649 }, { "epoch": 0.75, "grad_norm": 1.6883250449652418, "learning_rate": 1.6022492990175525e-06, "loss": 0.7248, "step": 11650 }, { "epoch": 0.75, "grad_norm": 1.249935158994328, "learning_rate": 1.601488924734324e-06, "loss": 0.795, "step": 11651 }, { "epoch": 0.75, "grad_norm": 1.583957004704792, "learning_rate": 1.6007286965119602e-06, "loss": 0.6462, "step": 11652 }, { "epoch": 0.75, "grad_norm": 3.233420178997326, "learning_rate": 1.5999686143831344e-06, "loss": 0.6859, "step": 11653 }, { "epoch": 0.75, "grad_norm": 1.4859786799685597, "learning_rate": 1.5992086783805111e-06, "loss": 0.5943, "step": 11654 }, { "epoch": 0.75, "grad_norm": 1.6085800926822988, "learning_rate": 1.5984488885367543e-06, "loss": 0.7053, "step": 11655 }, { "epoch": 0.75, "grad_norm": 1.7375568589178454, "learning_rate": 1.5976892448845172e-06, "loss": 0.7505, "step": 11656 }, { "epoch": 0.75, "grad_norm": 1.6794552179479747, "learning_rate": 1.5969297474564465e-06, "loss": 0.6783, "step": 11657 }, { "epoch": 0.75, "grad_norm": 1.0656538784387186, "learning_rate": 1.5961703962851848e-06, "loss": 0.7244, "step": 11658 }, { "epoch": 0.75, "grad_norm": 1.5657582966680343, "learning_rate": 1.595411191403367e-06, "loss": 0.6702, "step": 11659 }, { "epoch": 0.75, "grad_norm": 1.4483465673875866, "learning_rate": 1.5946521328436221e-06, "loss": 0.5773, "step": 11660 }, { "epoch": 0.75, "grad_norm": 1.5001083054596869, "learning_rate": 1.5938932206385716e-06, "loss": 0.6354, "step": 11661 }, { "epoch": 0.75, "grad_norm": 1.5388906854299884, "learning_rate": 1.593134454820836e-06, "loss": 0.6278, "step": 11662 }, { "epoch": 0.75, "grad_norm": 1.6269965108777051, "learning_rate": 1.5923758354230196e-06, "loss": 0.5988, "step": 11663 }, { "epoch": 0.75, "grad_norm": 1.4352926823290182, "learning_rate": 1.5916173624777308e-06, "loss": 0.7425, "step": 11664 }, { "epoch": 0.75, "grad_norm": 1.5269659775997657, "learning_rate": 1.5908590360175663e-06, "loss": 0.6862, "step": 11665 }, { "epoch": 0.75, "grad_norm": 1.8413199604437365, "learning_rate": 1.590100856075117e-06, "loss": 0.6424, "step": 11666 }, { "epoch": 0.75, "grad_norm": 1.616343434761012, "learning_rate": 1.5893428226829672e-06, "loss": 0.6242, "step": 11667 }, { "epoch": 0.75, "grad_norm": 1.65358338226454, "learning_rate": 1.5885849358736944e-06, "loss": 0.6686, "step": 11668 }, { "epoch": 0.75, "grad_norm": 1.6166104842064062, "learning_rate": 1.5878271956798762e-06, "loss": 0.7228, "step": 11669 }, { "epoch": 0.75, "grad_norm": 1.7105027638651464, "learning_rate": 1.5870696021340714e-06, "loss": 0.756, "step": 11670 }, { "epoch": 0.75, "grad_norm": 1.868811923051277, "learning_rate": 1.5863121552688477e-06, "loss": 0.7444, "step": 11671 }, { "epoch": 0.75, "grad_norm": 1.3569784733445822, "learning_rate": 1.5855548551167505e-06, "loss": 0.6828, "step": 11672 }, { "epoch": 0.75, "grad_norm": 1.502936501552538, "learning_rate": 1.5847977017103327e-06, "loss": 0.6909, "step": 11673 }, { "epoch": 0.75, "grad_norm": 1.5939708574622422, "learning_rate": 1.5840406950821335e-06, "loss": 0.6105, "step": 11674 }, { "epoch": 0.75, "grad_norm": 1.770594442317308, "learning_rate": 1.5832838352646856e-06, "loss": 0.6729, "step": 11675 }, { "epoch": 0.75, "grad_norm": 1.6129581692319728, "learning_rate": 1.582527122290522e-06, "loss": 0.6796, "step": 11676 }, { "epoch": 0.75, "grad_norm": 1.6930405030696705, "learning_rate": 1.5817705561921587e-06, "loss": 0.6268, "step": 11677 }, { "epoch": 0.75, "grad_norm": 1.717425413277279, "learning_rate": 1.581014137002117e-06, "loss": 0.6716, "step": 11678 }, { "epoch": 0.75, "grad_norm": 1.5571509049463543, "learning_rate": 1.5802578647529005e-06, "loss": 0.6726, "step": 11679 }, { "epoch": 0.75, "grad_norm": 1.6943507309363324, "learning_rate": 1.5795017394770167e-06, "loss": 0.6316, "step": 11680 }, { "epoch": 0.75, "grad_norm": 1.5974839195979658, "learning_rate": 1.5787457612069607e-06, "loss": 0.6589, "step": 11681 }, { "epoch": 0.75, "grad_norm": 1.8266377590921439, "learning_rate": 1.5779899299752227e-06, "loss": 0.7538, "step": 11682 }, { "epoch": 0.75, "grad_norm": 1.640717690423354, "learning_rate": 1.577234245814287e-06, "loss": 0.7082, "step": 11683 }, { "epoch": 0.75, "grad_norm": 1.3888080479140836, "learning_rate": 1.576478708756629e-06, "loss": 0.547, "step": 11684 }, { "epoch": 0.75, "grad_norm": 1.585105730470735, "learning_rate": 1.5757233188347265e-06, "loss": 0.656, "step": 11685 }, { "epoch": 0.75, "grad_norm": 1.5687390301674835, "learning_rate": 1.5749680760810365e-06, "loss": 0.6547, "step": 11686 }, { "epoch": 0.75, "grad_norm": 1.5539460550826685, "learning_rate": 1.5742129805280249e-06, "loss": 0.6208, "step": 11687 }, { "epoch": 0.75, "grad_norm": 1.729632261952158, "learning_rate": 1.5734580322081377e-06, "loss": 0.6844, "step": 11688 }, { "epoch": 0.75, "grad_norm": 1.4675674049218705, "learning_rate": 1.572703231153826e-06, "loss": 0.7016, "step": 11689 }, { "epoch": 0.75, "grad_norm": 1.4657005018811171, "learning_rate": 1.5719485773975275e-06, "loss": 0.643, "step": 11690 }, { "epoch": 0.75, "grad_norm": 1.2265717132702507, "learning_rate": 1.5711940709716755e-06, "loss": 0.634, "step": 11691 }, { "epoch": 0.75, "grad_norm": 1.4624691568265096, "learning_rate": 1.5704397119086974e-06, "loss": 0.6144, "step": 11692 }, { "epoch": 0.75, "grad_norm": 2.0732081944795695, "learning_rate": 1.5696855002410127e-06, "loss": 0.7087, "step": 11693 }, { "epoch": 0.75, "grad_norm": 1.4525464453644659, "learning_rate": 1.5689314360010404e-06, "loss": 0.6046, "step": 11694 }, { "epoch": 0.75, "grad_norm": 1.6817011580080032, "learning_rate": 1.5681775192211819e-06, "loss": 0.6737, "step": 11695 }, { "epoch": 0.75, "grad_norm": 1.6780817232738345, "learning_rate": 1.567423749933844e-06, "loss": 0.7007, "step": 11696 }, { "epoch": 0.75, "grad_norm": 1.0975939439604734, "learning_rate": 1.5666701281714202e-06, "loss": 0.6557, "step": 11697 }, { "epoch": 0.75, "grad_norm": 1.4547527309497648, "learning_rate": 1.5659166539663001e-06, "loss": 0.6792, "step": 11698 }, { "epoch": 0.75, "grad_norm": 1.4812493548815062, "learning_rate": 1.5651633273508666e-06, "loss": 0.7324, "step": 11699 }, { "epoch": 0.75, "grad_norm": 1.6596610962933036, "learning_rate": 1.5644101483574953e-06, "loss": 0.6673, "step": 11700 }, { "epoch": 0.75, "grad_norm": 1.5300475662860373, "learning_rate": 1.5636571170185565e-06, "loss": 0.6103, "step": 11701 }, { "epoch": 0.75, "grad_norm": 1.4885045401621626, "learning_rate": 1.562904233366413e-06, "loss": 0.6675, "step": 11702 }, { "epoch": 0.75, "grad_norm": 1.6166714746393065, "learning_rate": 1.5621514974334246e-06, "loss": 0.6684, "step": 11703 }, { "epoch": 0.75, "grad_norm": 1.6472224446747603, "learning_rate": 1.5613989092519406e-06, "loss": 0.6695, "step": 11704 }, { "epoch": 0.75, "grad_norm": 1.7863551833760396, "learning_rate": 1.5606464688543066e-06, "loss": 0.6385, "step": 11705 }, { "epoch": 0.75, "grad_norm": 1.605739418705551, "learning_rate": 1.5598941762728597e-06, "loss": 0.6356, "step": 11706 }, { "epoch": 0.75, "grad_norm": 1.5099855008474603, "learning_rate": 1.5591420315399324e-06, "loss": 0.6661, "step": 11707 }, { "epoch": 0.75, "grad_norm": 1.4740369610846422, "learning_rate": 1.5583900346878505e-06, "loss": 0.5736, "step": 11708 }, { "epoch": 0.75, "grad_norm": 1.912408166034611, "learning_rate": 1.5576381857489337e-06, "loss": 0.6353, "step": 11709 }, { "epoch": 0.75, "grad_norm": 1.4277420196939687, "learning_rate": 1.5568864847554922e-06, "loss": 0.6749, "step": 11710 }, { "epoch": 0.75, "grad_norm": 1.5235433214019474, "learning_rate": 1.556134931739836e-06, "loss": 0.7085, "step": 11711 }, { "epoch": 0.75, "grad_norm": 1.4578754639141704, "learning_rate": 1.5553835267342642e-06, "loss": 0.6039, "step": 11712 }, { "epoch": 0.75, "grad_norm": 1.4507037456226388, "learning_rate": 1.5546322697710697e-06, "loss": 0.6305, "step": 11713 }, { "epoch": 0.75, "grad_norm": 1.6271867401840145, "learning_rate": 1.5538811608825411e-06, "loss": 0.6745, "step": 11714 }, { "epoch": 0.75, "grad_norm": 1.770417744874038, "learning_rate": 1.553130200100959e-06, "loss": 0.7178, "step": 11715 }, { "epoch": 0.75, "grad_norm": 1.5354511499566332, "learning_rate": 1.5523793874585979e-06, "loss": 0.7205, "step": 11716 }, { "epoch": 0.75, "grad_norm": 1.960574187904981, "learning_rate": 1.5516287229877242e-06, "loss": 0.602, "step": 11717 }, { "epoch": 0.75, "grad_norm": 1.5369883735958376, "learning_rate": 1.5508782067206056e-06, "loss": 0.6846, "step": 11718 }, { "epoch": 0.75, "grad_norm": 1.6123452501528195, "learning_rate": 1.5501278386894907e-06, "loss": 0.7113, "step": 11719 }, { "epoch": 0.75, "grad_norm": 1.1773936916004888, "learning_rate": 1.549377618926634e-06, "loss": 0.6228, "step": 11720 }, { "epoch": 0.75, "grad_norm": 1.741462425444859, "learning_rate": 1.5486275474642765e-06, "loss": 0.6245, "step": 11721 }, { "epoch": 0.75, "grad_norm": 1.5503831034331435, "learning_rate": 1.5478776243346544e-06, "loss": 0.6481, "step": 11722 }, { "epoch": 0.75, "grad_norm": 1.368316499552045, "learning_rate": 1.5471278495699982e-06, "loss": 0.6766, "step": 11723 }, { "epoch": 0.75, "grad_norm": 1.5315119091338547, "learning_rate": 1.5463782232025315e-06, "loss": 0.7627, "step": 11724 }, { "epoch": 0.75, "grad_norm": 1.6111195478019806, "learning_rate": 1.545628745264472e-06, "loss": 0.6675, "step": 11725 }, { "epoch": 0.75, "grad_norm": 1.5262292684643015, "learning_rate": 1.5448794157880282e-06, "loss": 0.6361, "step": 11726 }, { "epoch": 0.75, "grad_norm": 1.4512619571598213, "learning_rate": 1.5441302348054105e-06, "loss": 0.6683, "step": 11727 }, { "epoch": 0.75, "grad_norm": 1.3589898682759964, "learning_rate": 1.5433812023488104e-06, "loss": 0.5617, "step": 11728 }, { "epoch": 0.75, "grad_norm": 2.19343711722184, "learning_rate": 1.5426323184504244e-06, "loss": 0.628, "step": 11729 }, { "epoch": 0.75, "grad_norm": 1.0822559858397307, "learning_rate": 1.541883583142436e-06, "loss": 0.6639, "step": 11730 }, { "epoch": 0.75, "grad_norm": 1.521638081393906, "learning_rate": 1.5411349964570254e-06, "loss": 0.7143, "step": 11731 }, { "epoch": 0.75, "grad_norm": 1.6621748091008806, "learning_rate": 1.5403865584263638e-06, "loss": 0.6382, "step": 11732 }, { "epoch": 0.75, "grad_norm": 1.5266276773098768, "learning_rate": 1.5396382690826173e-06, "loss": 0.6276, "step": 11733 }, { "epoch": 0.75, "grad_norm": 1.5910637503422385, "learning_rate": 1.53889012845795e-06, "loss": 0.7774, "step": 11734 }, { "epoch": 0.75, "grad_norm": 1.5464671167828312, "learning_rate": 1.5381421365845083e-06, "loss": 0.6456, "step": 11735 }, { "epoch": 0.75, "grad_norm": 1.4837036830405794, "learning_rate": 1.5373942934944468e-06, "loss": 0.5799, "step": 11736 }, { "epoch": 0.75, "grad_norm": 1.5355426385220552, "learning_rate": 1.5366465992198997e-06, "loss": 0.6171, "step": 11737 }, { "epoch": 0.75, "grad_norm": 1.5908720987078613, "learning_rate": 1.5358990537930058e-06, "loss": 0.6026, "step": 11738 }, { "epoch": 0.75, "grad_norm": 1.5438817615999652, "learning_rate": 1.5351516572458913e-06, "loss": 0.6939, "step": 11739 }, { "epoch": 0.75, "grad_norm": 1.5939331330515165, "learning_rate": 1.5344044096106763e-06, "loss": 0.7108, "step": 11740 }, { "epoch": 0.75, "grad_norm": 2.396575180862144, "learning_rate": 1.5336573109194807e-06, "loss": 0.6182, "step": 11741 }, { "epoch": 0.75, "grad_norm": 1.5078909538369991, "learning_rate": 1.5329103612044072e-06, "loss": 0.6226, "step": 11742 }, { "epoch": 0.75, "grad_norm": 1.4260430175471681, "learning_rate": 1.5321635604975637e-06, "loss": 0.6091, "step": 11743 }, { "epoch": 0.75, "grad_norm": 1.588155757313863, "learning_rate": 1.5314169088310405e-06, "loss": 0.6601, "step": 11744 }, { "epoch": 0.75, "grad_norm": 1.5015083839599852, "learning_rate": 1.530670406236932e-06, "loss": 0.6147, "step": 11745 }, { "epoch": 0.75, "grad_norm": 1.7064677214889277, "learning_rate": 1.5299240527473191e-06, "loss": 0.6147, "step": 11746 }, { "epoch": 0.75, "grad_norm": 2.107734147611329, "learning_rate": 1.5291778483942771e-06, "loss": 0.5764, "step": 11747 }, { "epoch": 0.75, "grad_norm": 1.4392756836890976, "learning_rate": 1.5284317932098813e-06, "loss": 0.5575, "step": 11748 }, { "epoch": 0.75, "grad_norm": 1.855652672253301, "learning_rate": 1.527685887226189e-06, "loss": 0.6827, "step": 11749 }, { "epoch": 0.75, "grad_norm": 1.477214252756313, "learning_rate": 1.526940130475264e-06, "loss": 0.6643, "step": 11750 }, { "epoch": 0.75, "grad_norm": 1.557435178733055, "learning_rate": 1.5261945229891512e-06, "loss": 0.625, "step": 11751 }, { "epoch": 0.75, "grad_norm": 1.7563098665448016, "learning_rate": 1.5254490647999e-06, "loss": 0.658, "step": 11752 }, { "epoch": 0.75, "grad_norm": 1.8672111288922952, "learning_rate": 1.5247037559395467e-06, "loss": 0.6497, "step": 11753 }, { "epoch": 0.75, "grad_norm": 1.4791747949966463, "learning_rate": 1.5239585964401237e-06, "loss": 0.7568, "step": 11754 }, { "epoch": 0.75, "grad_norm": 1.4286330230519573, "learning_rate": 1.5232135863336556e-06, "loss": 0.5652, "step": 11755 }, { "epoch": 0.75, "grad_norm": 1.6699268686799131, "learning_rate": 1.522468725652161e-06, "loss": 0.6976, "step": 11756 }, { "epoch": 0.75, "grad_norm": 1.6366224184095817, "learning_rate": 1.5217240144276558e-06, "loss": 0.66, "step": 11757 }, { "epoch": 0.75, "grad_norm": 1.7095348377258839, "learning_rate": 1.5209794526921406e-06, "loss": 0.578, "step": 11758 }, { "epoch": 0.75, "grad_norm": 1.1605028825018626, "learning_rate": 1.5202350404776196e-06, "loss": 0.6945, "step": 11759 }, { "epoch": 0.75, "grad_norm": 1.1081746364542167, "learning_rate": 1.519490777816085e-06, "loss": 0.6942, "step": 11760 }, { "epoch": 0.75, "grad_norm": 1.1243720893359201, "learning_rate": 1.5187466647395227e-06, "loss": 0.7026, "step": 11761 }, { "epoch": 0.75, "grad_norm": 1.4841327274605347, "learning_rate": 1.5180027012799138e-06, "loss": 0.6035, "step": 11762 }, { "epoch": 0.75, "grad_norm": 0.9334979706668303, "learning_rate": 1.5172588874692318e-06, "loss": 0.6332, "step": 11763 }, { "epoch": 0.75, "grad_norm": 1.4096017760422122, "learning_rate": 1.5165152233394453e-06, "loss": 0.6506, "step": 11764 }, { "epoch": 0.75, "grad_norm": 1.7048925735864884, "learning_rate": 1.5157717089225144e-06, "loss": 0.595, "step": 11765 }, { "epoch": 0.75, "grad_norm": 1.5036411435919825, "learning_rate": 1.5150283442503937e-06, "loss": 0.6053, "step": 11766 }, { "epoch": 0.75, "grad_norm": 1.5428011399753983, "learning_rate": 1.5142851293550303e-06, "loss": 0.6437, "step": 11767 }, { "epoch": 0.75, "grad_norm": 1.649762082363539, "learning_rate": 1.5135420642683696e-06, "loss": 0.6676, "step": 11768 }, { "epoch": 0.75, "grad_norm": 2.4289903561337076, "learning_rate": 1.5127991490223449e-06, "loss": 0.7267, "step": 11769 }, { "epoch": 0.75, "grad_norm": 1.5566131571519357, "learning_rate": 1.5120563836488844e-06, "loss": 0.6328, "step": 11770 }, { "epoch": 0.75, "grad_norm": 1.5668797245541055, "learning_rate": 1.5113137681799123e-06, "loss": 0.7092, "step": 11771 }, { "epoch": 0.75, "grad_norm": 1.485009730803725, "learning_rate": 1.5105713026473429e-06, "loss": 0.5617, "step": 11772 }, { "epoch": 0.75, "grad_norm": 1.6264369269830934, "learning_rate": 1.5098289870830869e-06, "loss": 0.7131, "step": 11773 }, { "epoch": 0.75, "grad_norm": 1.5212938040730979, "learning_rate": 1.5090868215190474e-06, "loss": 0.6937, "step": 11774 }, { "epoch": 0.75, "grad_norm": 1.7633935392169373, "learning_rate": 1.508344805987119e-06, "loss": 0.7172, "step": 11775 }, { "epoch": 0.75, "grad_norm": 1.6144219636078063, "learning_rate": 1.5076029405191955e-06, "loss": 0.6808, "step": 11776 }, { "epoch": 0.75, "grad_norm": 1.5884775266107283, "learning_rate": 1.5068612251471592e-06, "loss": 0.8121, "step": 11777 }, { "epoch": 0.75, "grad_norm": 1.6118111465938951, "learning_rate": 1.5061196599028877e-06, "loss": 0.7334, "step": 11778 }, { "epoch": 0.75, "grad_norm": 1.5407625773216183, "learning_rate": 1.5053782448182509e-06, "loss": 0.6986, "step": 11779 }, { "epoch": 0.75, "grad_norm": 1.6396357448761703, "learning_rate": 1.5046369799251143e-06, "loss": 0.7368, "step": 11780 }, { "epoch": 0.75, "grad_norm": 1.3985991369450437, "learning_rate": 1.5038958652553354e-06, "loss": 0.5298, "step": 11781 }, { "epoch": 0.75, "grad_norm": 1.2820003370416697, "learning_rate": 1.5031549008407642e-06, "loss": 0.7013, "step": 11782 }, { "epoch": 0.75, "grad_norm": 1.604499421317278, "learning_rate": 1.502414086713251e-06, "loss": 0.6972, "step": 11783 }, { "epoch": 0.75, "grad_norm": 1.5171914430940407, "learning_rate": 1.5016734229046277e-06, "loss": 0.6034, "step": 11784 }, { "epoch": 0.75, "grad_norm": 1.3743485388925232, "learning_rate": 1.5009329094467313e-06, "loss": 0.6646, "step": 11785 }, { "epoch": 0.75, "grad_norm": 1.5735841282698242, "learning_rate": 1.5001925463713857e-06, "loss": 0.6718, "step": 11786 }, { "epoch": 0.75, "grad_norm": 1.9435078905879588, "learning_rate": 1.49945233371041e-06, "loss": 0.679, "step": 11787 }, { "epoch": 0.75, "grad_norm": 1.6879784001077494, "learning_rate": 1.4987122714956177e-06, "loss": 0.6681, "step": 11788 }, { "epoch": 0.75, "grad_norm": 1.5427721061959756, "learning_rate": 1.497972359758813e-06, "loss": 0.5721, "step": 11789 }, { "epoch": 0.75, "grad_norm": 1.6726568286375458, "learning_rate": 1.4972325985318009e-06, "loss": 0.6595, "step": 11790 }, { "epoch": 0.75, "grad_norm": 1.60269101215464, "learning_rate": 1.4964929878463685e-06, "loss": 0.6548, "step": 11791 }, { "epoch": 0.75, "grad_norm": 2.1672537718390417, "learning_rate": 1.4957535277343083e-06, "loss": 0.5883, "step": 11792 }, { "epoch": 0.75, "grad_norm": 1.589893653351426, "learning_rate": 1.4950142182273947e-06, "loss": 0.6956, "step": 11793 }, { "epoch": 0.75, "grad_norm": 1.4482550874724942, "learning_rate": 1.4942750593574073e-06, "loss": 0.6968, "step": 11794 }, { "epoch": 0.75, "grad_norm": 1.4626804999971983, "learning_rate": 1.493536051156111e-06, "loss": 0.6825, "step": 11795 }, { "epoch": 0.76, "grad_norm": 1.1222941500793115, "learning_rate": 1.4927971936552666e-06, "loss": 0.7218, "step": 11796 }, { "epoch": 0.76, "grad_norm": 1.508160442888374, "learning_rate": 1.4920584868866295e-06, "loss": 0.6062, "step": 11797 }, { "epoch": 0.76, "grad_norm": 1.6314294131133864, "learning_rate": 1.4913199308819458e-06, "loss": 0.6803, "step": 11798 }, { "epoch": 0.76, "grad_norm": 1.3797526333309373, "learning_rate": 1.4905815256729621e-06, "loss": 0.5836, "step": 11799 }, { "epoch": 0.76, "grad_norm": 1.7452442520341058, "learning_rate": 1.4898432712914074e-06, "loss": 0.7378, "step": 11800 }, { "epoch": 0.76, "grad_norm": 1.4675556213692509, "learning_rate": 1.4891051677690156e-06, "loss": 0.6168, "step": 11801 }, { "epoch": 0.76, "grad_norm": 1.4566957807750252, "learning_rate": 1.4883672151375029e-06, "loss": 0.6607, "step": 11802 }, { "epoch": 0.76, "grad_norm": 1.507503668858242, "learning_rate": 1.4876294134285902e-06, "loss": 0.7335, "step": 11803 }, { "epoch": 0.76, "grad_norm": 1.4850964352432887, "learning_rate": 1.4868917626739848e-06, "loss": 0.6293, "step": 11804 }, { "epoch": 0.76, "grad_norm": 1.6928569645882539, "learning_rate": 1.4861542629053882e-06, "loss": 0.6983, "step": 11805 }, { "epoch": 0.76, "grad_norm": 1.395342775457352, "learning_rate": 1.4854169141545004e-06, "loss": 0.6042, "step": 11806 }, { "epoch": 0.76, "grad_norm": 1.7262824664283782, "learning_rate": 1.4846797164530051e-06, "loss": 0.6324, "step": 11807 }, { "epoch": 0.76, "grad_norm": 1.4953649264903515, "learning_rate": 1.4839426698325927e-06, "loss": 0.6786, "step": 11808 }, { "epoch": 0.76, "grad_norm": 1.1992420592063147, "learning_rate": 1.4832057743249329e-06, "loss": 0.75, "step": 11809 }, { "epoch": 0.76, "grad_norm": 1.4678651853273323, "learning_rate": 1.4824690299617e-06, "loss": 0.5615, "step": 11810 }, { "epoch": 0.76, "grad_norm": 1.0312518628446647, "learning_rate": 1.4817324367745573e-06, "loss": 0.6721, "step": 11811 }, { "epoch": 0.76, "grad_norm": 1.4467419440538762, "learning_rate": 1.4809959947951602e-06, "loss": 0.6583, "step": 11812 }, { "epoch": 0.76, "grad_norm": 1.564117735790171, "learning_rate": 1.4802597040551636e-06, "loss": 0.6529, "step": 11813 }, { "epoch": 0.76, "grad_norm": 1.0917432964542386, "learning_rate": 1.479523564586206e-06, "loss": 0.667, "step": 11814 }, { "epoch": 0.76, "grad_norm": 1.5120437515154586, "learning_rate": 1.4787875764199312e-06, "loss": 0.673, "step": 11815 }, { "epoch": 0.76, "grad_norm": 1.7643609819862824, "learning_rate": 1.478051739587964e-06, "loss": 0.6553, "step": 11816 }, { "epoch": 0.76, "grad_norm": 1.54317073578976, "learning_rate": 1.4773160541219338e-06, "loss": 0.6153, "step": 11817 }, { "epoch": 0.76, "grad_norm": 1.4405446757304743, "learning_rate": 1.4765805200534578e-06, "loss": 0.6882, "step": 11818 }, { "epoch": 0.76, "grad_norm": 1.1178855224358393, "learning_rate": 1.4758451374141469e-06, "loss": 0.6877, "step": 11819 }, { "epoch": 0.76, "grad_norm": 1.5438964102876063, "learning_rate": 1.4751099062356073e-06, "loss": 0.5842, "step": 11820 }, { "epoch": 0.76, "grad_norm": 1.672344083900399, "learning_rate": 1.474374826549435e-06, "loss": 0.5937, "step": 11821 }, { "epoch": 0.76, "grad_norm": 1.145724714213181, "learning_rate": 1.473639898387228e-06, "loss": 0.5909, "step": 11822 }, { "epoch": 0.76, "grad_norm": 1.5297401500126504, "learning_rate": 1.4729051217805645e-06, "loss": 0.5633, "step": 11823 }, { "epoch": 0.76, "grad_norm": 1.5843687658637775, "learning_rate": 1.4721704967610294e-06, "loss": 0.761, "step": 11824 }, { "epoch": 0.76, "grad_norm": 1.6433132200396166, "learning_rate": 1.4714360233601933e-06, "loss": 0.6447, "step": 11825 }, { "epoch": 0.76, "grad_norm": 1.6637661036997378, "learning_rate": 1.470701701609622e-06, "loss": 0.6635, "step": 11826 }, { "epoch": 0.76, "grad_norm": 1.735171451341457, "learning_rate": 1.4699675315408756e-06, "loss": 0.6074, "step": 11827 }, { "epoch": 0.76, "grad_norm": 1.5869933645723509, "learning_rate": 1.4692335131855074e-06, "loss": 0.605, "step": 11828 }, { "epoch": 0.76, "grad_norm": 1.6033891836763254, "learning_rate": 1.468499646575064e-06, "loss": 0.6481, "step": 11829 }, { "epoch": 0.76, "grad_norm": 1.619897549993014, "learning_rate": 1.4677659317410826e-06, "loss": 0.6913, "step": 11830 }, { "epoch": 0.76, "grad_norm": 1.4876519592889614, "learning_rate": 1.4670323687151012e-06, "loss": 0.6144, "step": 11831 }, { "epoch": 0.76, "grad_norm": 1.8325149342114933, "learning_rate": 1.466298957528645e-06, "loss": 0.7433, "step": 11832 }, { "epoch": 0.76, "grad_norm": 1.2728389707491536, "learning_rate": 1.4655656982132338e-06, "loss": 0.6487, "step": 11833 }, { "epoch": 0.76, "grad_norm": 1.7747587094473343, "learning_rate": 1.4648325908003824e-06, "loss": 0.6721, "step": 11834 }, { "epoch": 0.76, "grad_norm": 1.4296572497855873, "learning_rate": 1.464099635321598e-06, "loss": 0.7476, "step": 11835 }, { "epoch": 0.76, "grad_norm": 1.7567288048210685, "learning_rate": 1.463366831808381e-06, "loss": 0.6092, "step": 11836 }, { "epoch": 0.76, "grad_norm": 1.5544257728082238, "learning_rate": 1.4626341802922262e-06, "loss": 0.6079, "step": 11837 }, { "epoch": 0.76, "grad_norm": 1.0555543435377104, "learning_rate": 1.461901680804621e-06, "loss": 0.7083, "step": 11838 }, { "epoch": 0.76, "grad_norm": 1.5812619425766552, "learning_rate": 1.461169333377047e-06, "loss": 0.7428, "step": 11839 }, { "epoch": 0.76, "grad_norm": 2.162591509986492, "learning_rate": 1.4604371380409776e-06, "loss": 0.7138, "step": 11840 }, { "epoch": 0.76, "grad_norm": 1.4490980501980273, "learning_rate": 1.459705094827884e-06, "loss": 0.5753, "step": 11841 }, { "epoch": 0.76, "grad_norm": 1.910011539444729, "learning_rate": 1.4589732037692262e-06, "loss": 0.6734, "step": 11842 }, { "epoch": 0.76, "grad_norm": 1.7502201164785471, "learning_rate": 1.4582414648964594e-06, "loss": 0.6357, "step": 11843 }, { "epoch": 0.76, "grad_norm": 1.7694417942078655, "learning_rate": 1.4575098782410324e-06, "loss": 0.6127, "step": 11844 }, { "epoch": 0.76, "grad_norm": 1.5950492791138995, "learning_rate": 1.4567784438343868e-06, "loss": 0.7261, "step": 11845 }, { "epoch": 0.76, "grad_norm": 1.3835975255521915, "learning_rate": 1.456047161707958e-06, "loss": 0.5943, "step": 11846 }, { "epoch": 0.76, "grad_norm": 1.44143090858266, "learning_rate": 1.455316031893174e-06, "loss": 0.713, "step": 11847 }, { "epoch": 0.76, "grad_norm": 1.1863926225936805, "learning_rate": 1.4545850544214618e-06, "loss": 0.6516, "step": 11848 }, { "epoch": 0.76, "grad_norm": 1.7662078788532767, "learning_rate": 1.4538542293242307e-06, "loss": 0.7052, "step": 11849 }, { "epoch": 0.76, "grad_norm": 1.5966380024035085, "learning_rate": 1.4531235566328954e-06, "loss": 0.6704, "step": 11850 }, { "epoch": 0.76, "grad_norm": 1.4962522750799687, "learning_rate": 1.4523930363788562e-06, "loss": 0.6108, "step": 11851 }, { "epoch": 0.76, "grad_norm": 1.2307310782703949, "learning_rate": 1.4516626685935097e-06, "loss": 0.6522, "step": 11852 }, { "epoch": 0.76, "grad_norm": 1.5755868435959253, "learning_rate": 1.450932453308246e-06, "loss": 0.6044, "step": 11853 }, { "epoch": 0.76, "grad_norm": 1.379799093934755, "learning_rate": 1.450202390554446e-06, "loss": 0.5654, "step": 11854 }, { "epoch": 0.76, "grad_norm": 1.5951415905642015, "learning_rate": 1.4494724803634912e-06, "loss": 0.6679, "step": 11855 }, { "epoch": 0.76, "grad_norm": 1.7273077057359851, "learning_rate": 1.4487427227667456e-06, "loss": 0.7548, "step": 11856 }, { "epoch": 0.76, "grad_norm": 1.6690468157980343, "learning_rate": 1.448013117795578e-06, "loss": 0.705, "step": 11857 }, { "epoch": 0.76, "grad_norm": 1.4588662905613585, "learning_rate": 1.4472836654813405e-06, "loss": 0.595, "step": 11858 }, { "epoch": 0.76, "grad_norm": 1.5033221540101736, "learning_rate": 1.446554365855387e-06, "loss": 0.6437, "step": 11859 }, { "epoch": 0.76, "grad_norm": 1.6663604819690552, "learning_rate": 1.4458252189490602e-06, "loss": 0.6638, "step": 11860 }, { "epoch": 0.76, "grad_norm": 1.3386160757422336, "learning_rate": 1.445096224793695e-06, "loss": 0.6221, "step": 11861 }, { "epoch": 0.76, "grad_norm": 1.6739212117380053, "learning_rate": 1.444367383420628e-06, "loss": 0.7083, "step": 11862 }, { "epoch": 0.76, "grad_norm": 1.7551043742687462, "learning_rate": 1.4436386948611763e-06, "loss": 0.7304, "step": 11863 }, { "epoch": 0.76, "grad_norm": 2.1928818117039515, "learning_rate": 1.4429101591466632e-06, "loss": 0.6285, "step": 11864 }, { "epoch": 0.76, "grad_norm": 1.5783804510157262, "learning_rate": 1.442181776308394e-06, "loss": 0.6221, "step": 11865 }, { "epoch": 0.76, "grad_norm": 1.5950405923139612, "learning_rate": 1.441453546377678e-06, "loss": 0.6425, "step": 11866 }, { "epoch": 0.76, "grad_norm": 1.4982137973284266, "learning_rate": 1.4407254693858108e-06, "loss": 0.7257, "step": 11867 }, { "epoch": 0.76, "grad_norm": 1.437343149114835, "learning_rate": 1.4399975453640824e-06, "loss": 0.6258, "step": 11868 }, { "epoch": 0.76, "grad_norm": 1.7629771267672536, "learning_rate": 1.4392697743437816e-06, "loss": 0.6505, "step": 11869 }, { "epoch": 0.76, "grad_norm": 1.6880208319091572, "learning_rate": 1.4385421563561808e-06, "loss": 0.7333, "step": 11870 }, { "epoch": 0.76, "grad_norm": 1.5094039709235567, "learning_rate": 1.437814691432558e-06, "loss": 0.6275, "step": 11871 }, { "epoch": 0.76, "grad_norm": 1.4701021504892666, "learning_rate": 1.4370873796041713e-06, "loss": 0.5954, "step": 11872 }, { "epoch": 0.76, "grad_norm": 1.6086333990376163, "learning_rate": 1.4363602209022837e-06, "loss": 0.6903, "step": 11873 }, { "epoch": 0.76, "grad_norm": 1.6351536711419516, "learning_rate": 1.4356332153581454e-06, "loss": 0.5974, "step": 11874 }, { "epoch": 0.76, "grad_norm": 1.5782919725310105, "learning_rate": 1.4349063630030018e-06, "loss": 0.6251, "step": 11875 }, { "epoch": 0.76, "grad_norm": 1.4667279454865207, "learning_rate": 1.434179663868092e-06, "loss": 0.5894, "step": 11876 }, { "epoch": 0.76, "grad_norm": 1.4637316368838487, "learning_rate": 1.4334531179846455e-06, "loss": 0.6095, "step": 11877 }, { "epoch": 0.76, "grad_norm": 1.4852002039541206, "learning_rate": 1.4327267253838928e-06, "loss": 0.6386, "step": 11878 }, { "epoch": 0.76, "grad_norm": 1.6681422607126002, "learning_rate": 1.432000486097046e-06, "loss": 0.6076, "step": 11879 }, { "epoch": 0.76, "grad_norm": 1.6863505090803763, "learning_rate": 1.4312744001553247e-06, "loss": 0.6799, "step": 11880 }, { "epoch": 0.76, "grad_norm": 1.5584525372516453, "learning_rate": 1.4305484675899272e-06, "loss": 0.6391, "step": 11881 }, { "epoch": 0.76, "grad_norm": 1.398035920495314, "learning_rate": 1.4298226884320577e-06, "loss": 0.6006, "step": 11882 }, { "epoch": 0.76, "grad_norm": 1.3476316201574787, "learning_rate": 1.4290970627129075e-06, "loss": 0.7138, "step": 11883 }, { "epoch": 0.76, "grad_norm": 1.4470629696779431, "learning_rate": 1.4283715904636614e-06, "loss": 0.6175, "step": 11884 }, { "epoch": 0.76, "grad_norm": 1.6565626225836316, "learning_rate": 1.4276462717154999e-06, "loss": 0.6628, "step": 11885 }, { "epoch": 0.76, "grad_norm": 1.3975591760584534, "learning_rate": 1.4269211064995931e-06, "loss": 0.576, "step": 11886 }, { "epoch": 0.76, "grad_norm": 1.6137944206839534, "learning_rate": 1.4261960948471122e-06, "loss": 0.6438, "step": 11887 }, { "epoch": 0.76, "grad_norm": 1.6046614932354653, "learning_rate": 1.4254712367892109e-06, "loss": 0.6879, "step": 11888 }, { "epoch": 0.76, "grad_norm": 1.856414512394266, "learning_rate": 1.424746532357046e-06, "loss": 0.7765, "step": 11889 }, { "epoch": 0.76, "grad_norm": 1.116746253464961, "learning_rate": 1.4240219815817624e-06, "loss": 0.6564, "step": 11890 }, { "epoch": 0.76, "grad_norm": 1.499650009321799, "learning_rate": 1.4232975844944997e-06, "loss": 0.6467, "step": 11891 }, { "epoch": 0.76, "grad_norm": 1.3883706585948214, "learning_rate": 1.4225733411263914e-06, "loss": 0.6265, "step": 11892 }, { "epoch": 0.76, "grad_norm": 1.4876234797882737, "learning_rate": 1.4218492515085636e-06, "loss": 0.6049, "step": 11893 }, { "epoch": 0.76, "grad_norm": 1.8447784080846612, "learning_rate": 1.4211253156721366e-06, "loss": 0.6833, "step": 11894 }, { "epoch": 0.76, "grad_norm": 1.707797394989243, "learning_rate": 1.4204015336482213e-06, "loss": 0.7369, "step": 11895 }, { "epoch": 0.76, "grad_norm": 1.8965266697187806, "learning_rate": 1.4196779054679276e-06, "loss": 0.7378, "step": 11896 }, { "epoch": 0.76, "grad_norm": 1.3589645477785746, "learning_rate": 1.418954431162355e-06, "loss": 0.6457, "step": 11897 }, { "epoch": 0.76, "grad_norm": 3.3131506584269306, "learning_rate": 1.4182311107625956e-06, "loss": 0.6226, "step": 11898 }, { "epoch": 0.76, "grad_norm": 1.0415764465183694, "learning_rate": 1.417507944299737e-06, "loss": 0.6374, "step": 11899 }, { "epoch": 0.76, "grad_norm": 1.627631812976303, "learning_rate": 1.4167849318048588e-06, "loss": 0.6028, "step": 11900 }, { "epoch": 0.76, "grad_norm": 1.7016581741540455, "learning_rate": 1.4160620733090351e-06, "loss": 0.6995, "step": 11901 }, { "epoch": 0.76, "grad_norm": 2.058992798191067, "learning_rate": 1.4153393688433326e-06, "loss": 0.6125, "step": 11902 }, { "epoch": 0.76, "grad_norm": 1.513342770488941, "learning_rate": 1.4146168184388099e-06, "loss": 0.6653, "step": 11903 }, { "epoch": 0.76, "grad_norm": 1.628903567045002, "learning_rate": 1.4138944221265243e-06, "loss": 0.7525, "step": 11904 }, { "epoch": 0.76, "grad_norm": 1.6478811200587933, "learning_rate": 1.413172179937521e-06, "loss": 0.699, "step": 11905 }, { "epoch": 0.76, "grad_norm": 1.4553041727953024, "learning_rate": 1.41245009190284e-06, "loss": 0.6316, "step": 11906 }, { "epoch": 0.76, "grad_norm": 1.6494702467944258, "learning_rate": 1.4117281580535158e-06, "loss": 0.5722, "step": 11907 }, { "epoch": 0.76, "grad_norm": 1.4343566321553305, "learning_rate": 1.4110063784205751e-06, "loss": 0.7127, "step": 11908 }, { "epoch": 0.76, "grad_norm": 1.7478602002665218, "learning_rate": 1.410284753035039e-06, "loss": 0.7108, "step": 11909 }, { "epoch": 0.76, "grad_norm": 1.5203221821300483, "learning_rate": 1.409563281927921e-06, "loss": 0.6955, "step": 11910 }, { "epoch": 0.76, "grad_norm": 1.3875526838949541, "learning_rate": 1.4088419651302288e-06, "loss": 0.5928, "step": 11911 }, { "epoch": 0.76, "grad_norm": 1.6511206652093446, "learning_rate": 1.4081208026729615e-06, "loss": 0.6999, "step": 11912 }, { "epoch": 0.76, "grad_norm": 1.462714167911395, "learning_rate": 1.407399794587117e-06, "loss": 0.6763, "step": 11913 }, { "epoch": 0.76, "grad_norm": 1.676010697804651, "learning_rate": 1.406678940903678e-06, "loss": 0.6208, "step": 11914 }, { "epoch": 0.76, "grad_norm": 1.5069326033523025, "learning_rate": 1.4059582416536282e-06, "loss": 0.6988, "step": 11915 }, { "epoch": 0.76, "grad_norm": 1.5540424120523857, "learning_rate": 1.4052376968679416e-06, "loss": 0.6768, "step": 11916 }, { "epoch": 0.76, "grad_norm": 1.5801847884618045, "learning_rate": 1.4045173065775852e-06, "loss": 0.6816, "step": 11917 }, { "epoch": 0.76, "grad_norm": 1.8666507040304134, "learning_rate": 1.4037970708135196e-06, "loss": 0.7184, "step": 11918 }, { "epoch": 0.76, "grad_norm": 1.6183150049447044, "learning_rate": 1.4030769896066975e-06, "loss": 0.6233, "step": 11919 }, { "epoch": 0.76, "grad_norm": 1.565669156549616, "learning_rate": 1.4023570629880718e-06, "loss": 0.5904, "step": 11920 }, { "epoch": 0.76, "grad_norm": 1.6467240249957922, "learning_rate": 1.4016372909885762e-06, "loss": 0.633, "step": 11921 }, { "epoch": 0.76, "grad_norm": 1.9567895334419287, "learning_rate": 1.4009176736391523e-06, "loss": 0.6445, "step": 11922 }, { "epoch": 0.76, "grad_norm": 1.5867633810905177, "learning_rate": 1.4001982109707201e-06, "loss": 0.5824, "step": 11923 }, { "epoch": 0.76, "grad_norm": 1.8182248687112252, "learning_rate": 1.399478903014207e-06, "loss": 0.7756, "step": 11924 }, { "epoch": 0.76, "grad_norm": 1.5661942844355226, "learning_rate": 1.3987597498005245e-06, "loss": 0.6382, "step": 11925 }, { "epoch": 0.76, "grad_norm": 1.7979549810177955, "learning_rate": 1.3980407513605793e-06, "loss": 0.7073, "step": 11926 }, { "epoch": 0.76, "grad_norm": 1.2991971442319674, "learning_rate": 1.397321907725277e-06, "loss": 0.5609, "step": 11927 }, { "epoch": 0.76, "grad_norm": 1.379624910193474, "learning_rate": 1.3966032189255058e-06, "loss": 0.6548, "step": 11928 }, { "epoch": 0.76, "grad_norm": 1.0152834055419795, "learning_rate": 1.3958846849921593e-06, "loss": 0.6909, "step": 11929 }, { "epoch": 0.76, "grad_norm": 1.695879964143004, "learning_rate": 1.3951663059561126e-06, "loss": 0.6672, "step": 11930 }, { "epoch": 0.76, "grad_norm": 1.6040121835237822, "learning_rate": 1.3944480818482448e-06, "loss": 0.6252, "step": 11931 }, { "epoch": 0.76, "grad_norm": 1.2765732997538703, "learning_rate": 1.3937300126994223e-06, "loss": 0.6473, "step": 11932 }, { "epoch": 0.76, "grad_norm": 1.6147258232256148, "learning_rate": 1.393012098540505e-06, "loss": 0.6537, "step": 11933 }, { "epoch": 0.76, "grad_norm": 1.662670371676068, "learning_rate": 1.392294339402351e-06, "loss": 0.6117, "step": 11934 }, { "epoch": 0.76, "grad_norm": 1.4553076869913553, "learning_rate": 1.3915767353158022e-06, "loss": 0.6706, "step": 11935 }, { "epoch": 0.76, "grad_norm": 1.7025200300656167, "learning_rate": 1.390859286311706e-06, "loss": 0.6165, "step": 11936 }, { "epoch": 0.76, "grad_norm": 1.3927628816989277, "learning_rate": 1.3901419924208908e-06, "loss": 0.5955, "step": 11937 }, { "epoch": 0.76, "grad_norm": 1.868107941937127, "learning_rate": 1.389424853674189e-06, "loss": 0.7151, "step": 11938 }, { "epoch": 0.76, "grad_norm": 1.4313648107960655, "learning_rate": 1.3887078701024204e-06, "loss": 0.6502, "step": 11939 }, { "epoch": 0.76, "grad_norm": 1.0424432842336504, "learning_rate": 1.387991041736399e-06, "loss": 0.6152, "step": 11940 }, { "epoch": 0.76, "grad_norm": 1.5185875181011268, "learning_rate": 1.3872743686069328e-06, "loss": 0.7248, "step": 11941 }, { "epoch": 0.76, "grad_norm": 1.4839598462800405, "learning_rate": 1.3865578507448212e-06, "loss": 0.6609, "step": 11942 }, { "epoch": 0.76, "grad_norm": 1.4386926413760417, "learning_rate": 1.3858414881808634e-06, "loss": 0.6081, "step": 11943 }, { "epoch": 0.76, "grad_norm": 1.5602463130383986, "learning_rate": 1.385125280945841e-06, "loss": 0.6271, "step": 11944 }, { "epoch": 0.76, "grad_norm": 1.6286503140951725, "learning_rate": 1.3844092290705396e-06, "loss": 0.7604, "step": 11945 }, { "epoch": 0.76, "grad_norm": 1.6096372940085253, "learning_rate": 1.3836933325857321e-06, "loss": 0.6259, "step": 11946 }, { "epoch": 0.76, "grad_norm": 1.4900817137898734, "learning_rate": 1.382977591522186e-06, "loss": 0.705, "step": 11947 }, { "epoch": 0.76, "grad_norm": 0.9419648587849175, "learning_rate": 1.3822620059106633e-06, "loss": 0.5648, "step": 11948 }, { "epoch": 0.76, "grad_norm": 1.5721827451570303, "learning_rate": 1.3815465757819174e-06, "loss": 0.5505, "step": 11949 }, { "epoch": 0.76, "grad_norm": 1.5288007349613988, "learning_rate": 1.3808313011666958e-06, "loss": 0.651, "step": 11950 }, { "epoch": 0.76, "grad_norm": 1.9067818044947697, "learning_rate": 1.3801161820957386e-06, "loss": 0.7177, "step": 11951 }, { "epoch": 0.77, "grad_norm": 1.619356043668964, "learning_rate": 1.3794012185997851e-06, "loss": 0.6022, "step": 11952 }, { "epoch": 0.77, "grad_norm": 1.273973994162406, "learning_rate": 1.378686410709556e-06, "loss": 0.6317, "step": 11953 }, { "epoch": 0.77, "grad_norm": 1.717076741542289, "learning_rate": 1.3779717584557766e-06, "loss": 0.6224, "step": 11954 }, { "epoch": 0.77, "grad_norm": 1.543803291632802, "learning_rate": 1.3772572618691604e-06, "loss": 0.6449, "step": 11955 }, { "epoch": 0.77, "grad_norm": 1.1153552840425507, "learning_rate": 1.376542920980415e-06, "loss": 0.6638, "step": 11956 }, { "epoch": 0.77, "grad_norm": 1.638459913011273, "learning_rate": 1.3758287358202404e-06, "loss": 0.7314, "step": 11957 }, { "epoch": 0.77, "grad_norm": 1.5183161741948592, "learning_rate": 1.3751147064193315e-06, "loss": 0.6732, "step": 11958 }, { "epoch": 0.77, "grad_norm": 1.660115386809336, "learning_rate": 1.3744008328083758e-06, "loss": 0.6386, "step": 11959 }, { "epoch": 0.77, "grad_norm": 0.9572796740702513, "learning_rate": 1.373687115018052e-06, "loss": 0.547, "step": 11960 }, { "epoch": 0.77, "grad_norm": 1.460385789373179, "learning_rate": 1.3729735530790378e-06, "loss": 0.7408, "step": 11961 }, { "epoch": 0.77, "grad_norm": 1.5550722455136285, "learning_rate": 1.3722601470219986e-06, "loss": 0.6323, "step": 11962 }, { "epoch": 0.77, "grad_norm": 1.180463731453268, "learning_rate": 1.3715468968775952e-06, "loss": 0.6555, "step": 11963 }, { "epoch": 0.77, "grad_norm": 1.4590675998200204, "learning_rate": 1.3708338026764823e-06, "loss": 0.6772, "step": 11964 }, { "epoch": 0.77, "grad_norm": 1.5541747814900726, "learning_rate": 1.3701208644493064e-06, "loss": 0.7498, "step": 11965 }, { "epoch": 0.77, "grad_norm": 1.74527642856842, "learning_rate": 1.369408082226708e-06, "loss": 0.7335, "step": 11966 }, { "epoch": 0.77, "grad_norm": 1.5463606572039856, "learning_rate": 1.3686954560393218e-06, "loss": 0.6513, "step": 11967 }, { "epoch": 0.77, "grad_norm": 1.4014984189292086, "learning_rate": 1.367982985917773e-06, "loss": 0.6314, "step": 11968 }, { "epoch": 0.77, "grad_norm": 1.6132940968828504, "learning_rate": 1.3672706718926849e-06, "loss": 0.6893, "step": 11969 }, { "epoch": 0.77, "grad_norm": 1.8757058225054553, "learning_rate": 1.3665585139946697e-06, "loss": 0.7586, "step": 11970 }, { "epoch": 0.77, "grad_norm": 0.9858708346365275, "learning_rate": 1.3658465122543346e-06, "loss": 0.6963, "step": 11971 }, { "epoch": 0.77, "grad_norm": 1.7969490126007737, "learning_rate": 1.3651346667022801e-06, "loss": 0.6553, "step": 11972 }, { "epoch": 0.77, "grad_norm": 1.068461253848747, "learning_rate": 1.3644229773690997e-06, "loss": 0.6105, "step": 11973 }, { "epoch": 0.77, "grad_norm": 1.0598081769965608, "learning_rate": 1.363711444285381e-06, "loss": 0.6504, "step": 11974 }, { "epoch": 0.77, "grad_norm": 1.771851738007924, "learning_rate": 1.3630000674817011e-06, "loss": 0.6847, "step": 11975 }, { "epoch": 0.77, "grad_norm": 1.4364803425494495, "learning_rate": 1.3622888469886391e-06, "loss": 0.694, "step": 11976 }, { "epoch": 0.77, "grad_norm": 1.6318233153723494, "learning_rate": 1.361577782836756e-06, "loss": 0.6626, "step": 11977 }, { "epoch": 0.77, "grad_norm": 1.2005250454760539, "learning_rate": 1.3608668750566157e-06, "loss": 0.7744, "step": 11978 }, { "epoch": 0.77, "grad_norm": 1.59685488271598, "learning_rate": 1.3601561236787702e-06, "loss": 0.6105, "step": 11979 }, { "epoch": 0.77, "grad_norm": 1.9721278750450302, "learning_rate": 1.359445528733766e-06, "loss": 0.6965, "step": 11980 }, { "epoch": 0.77, "grad_norm": 1.6305802967698002, "learning_rate": 1.3587350902521435e-06, "loss": 0.6839, "step": 11981 }, { "epoch": 0.77, "grad_norm": 1.2745070908426595, "learning_rate": 1.3580248082644337e-06, "loss": 0.629, "step": 11982 }, { "epoch": 0.77, "grad_norm": 1.1047832005637335, "learning_rate": 1.357314682801168e-06, "loss": 0.6874, "step": 11983 }, { "epoch": 0.77, "grad_norm": 0.9771634028309321, "learning_rate": 1.3566047138928594e-06, "loss": 0.6338, "step": 11984 }, { "epoch": 0.77, "grad_norm": 1.4758116748708228, "learning_rate": 1.3558949015700278e-06, "loss": 0.5815, "step": 11985 }, { "epoch": 0.77, "grad_norm": 1.8796318937510732, "learning_rate": 1.355185245863173e-06, "loss": 0.6127, "step": 11986 }, { "epoch": 0.77, "grad_norm": 1.538643271822942, "learning_rate": 1.3544757468027986e-06, "loss": 0.6353, "step": 11987 }, { "epoch": 0.77, "grad_norm": 1.3904984342766022, "learning_rate": 1.3537664044193965e-06, "loss": 0.611, "step": 11988 }, { "epoch": 0.77, "grad_norm": 1.6226833811900658, "learning_rate": 1.3530572187434531e-06, "loss": 0.5987, "step": 11989 }, { "epoch": 0.77, "grad_norm": 1.614667746006591, "learning_rate": 1.3523481898054463e-06, "loss": 0.6937, "step": 11990 }, { "epoch": 0.77, "grad_norm": 1.6696562956939096, "learning_rate": 1.351639317635849e-06, "loss": 0.6277, "step": 11991 }, { "epoch": 0.77, "grad_norm": 1.6285218194037667, "learning_rate": 1.3509306022651297e-06, "loss": 0.6288, "step": 11992 }, { "epoch": 0.77, "grad_norm": 1.4165390590505542, "learning_rate": 1.3502220437237429e-06, "loss": 0.595, "step": 11993 }, { "epoch": 0.77, "grad_norm": 1.5340984886625153, "learning_rate": 1.3495136420421461e-06, "loss": 0.7138, "step": 11994 }, { "epoch": 0.77, "grad_norm": 1.1236433224228095, "learning_rate": 1.3488053972507792e-06, "loss": 0.5977, "step": 11995 }, { "epoch": 0.77, "grad_norm": 1.1557180349449596, "learning_rate": 1.3480973093800859e-06, "loss": 0.6982, "step": 11996 }, { "epoch": 0.77, "grad_norm": 1.4835828170166487, "learning_rate": 1.3473893784604963e-06, "loss": 0.6492, "step": 11997 }, { "epoch": 0.77, "grad_norm": 1.5120010805962687, "learning_rate": 1.3466816045224346e-06, "loss": 0.569, "step": 11998 }, { "epoch": 0.77, "grad_norm": 1.476914884001383, "learning_rate": 1.345973987596324e-06, "loss": 0.7129, "step": 11999 }, { "epoch": 0.77, "grad_norm": 1.4954554644007279, "learning_rate": 1.3452665277125697e-06, "loss": 0.6721, "step": 12000 }, { "epoch": 0.77, "grad_norm": 1.580800217476827, "learning_rate": 1.3445592249015843e-06, "loss": 0.71, "step": 12001 }, { "epoch": 0.77, "grad_norm": 1.5653739685917738, "learning_rate": 1.3438520791937587e-06, "loss": 0.6742, "step": 12002 }, { "epoch": 0.77, "grad_norm": 1.4836670860095185, "learning_rate": 1.3431450906194892e-06, "loss": 0.6131, "step": 12003 }, { "epoch": 0.77, "grad_norm": 1.121373289897147, "learning_rate": 1.3424382592091606e-06, "loss": 0.6304, "step": 12004 }, { "epoch": 0.77, "grad_norm": 1.43397861962328, "learning_rate": 1.3417315849931495e-06, "loss": 0.6875, "step": 12005 }, { "epoch": 0.77, "grad_norm": 1.8361890874110067, "learning_rate": 1.3410250680018277e-06, "loss": 0.6719, "step": 12006 }, { "epoch": 0.77, "grad_norm": 1.475524821674537, "learning_rate": 1.3403187082655584e-06, "loss": 0.6826, "step": 12007 }, { "epoch": 0.77, "grad_norm": 1.5604468842453714, "learning_rate": 1.3396125058147042e-06, "loss": 0.632, "step": 12008 }, { "epoch": 0.77, "grad_norm": 1.5596475420257085, "learning_rate": 1.3389064606796098e-06, "loss": 0.6595, "step": 12009 }, { "epoch": 0.77, "grad_norm": 1.5577986929082284, "learning_rate": 1.338200572890624e-06, "loss": 0.6535, "step": 12010 }, { "epoch": 0.77, "grad_norm": 1.4112684973111638, "learning_rate": 1.3374948424780836e-06, "loss": 0.5533, "step": 12011 }, { "epoch": 0.77, "grad_norm": 1.5456491342913772, "learning_rate": 1.3367892694723183e-06, "loss": 0.612, "step": 12012 }, { "epoch": 0.77, "grad_norm": 2.9227408630362732, "learning_rate": 1.336083853903653e-06, "loss": 0.5771, "step": 12013 }, { "epoch": 0.77, "grad_norm": 1.5977568329156524, "learning_rate": 1.3353785958024052e-06, "loss": 0.6899, "step": 12014 }, { "epoch": 0.77, "grad_norm": 1.895685328785089, "learning_rate": 1.3346734951988844e-06, "loss": 0.6225, "step": 12015 }, { "epoch": 0.77, "grad_norm": 1.444343394563671, "learning_rate": 1.3339685521233931e-06, "loss": 0.6045, "step": 12016 }, { "epoch": 0.77, "grad_norm": 1.6572503275729213, "learning_rate": 1.333263766606232e-06, "loss": 0.6691, "step": 12017 }, { "epoch": 0.77, "grad_norm": 1.560138705298311, "learning_rate": 1.3325591386776892e-06, "loss": 0.6461, "step": 12018 }, { "epoch": 0.77, "grad_norm": 1.1406676106627183, "learning_rate": 1.3318546683680483e-06, "loss": 0.5778, "step": 12019 }, { "epoch": 0.77, "grad_norm": 1.5656925636618624, "learning_rate": 1.3311503557075862e-06, "loss": 0.6619, "step": 12020 }, { "epoch": 0.77, "grad_norm": 1.7128598529392713, "learning_rate": 1.3304462007265716e-06, "loss": 0.6723, "step": 12021 }, { "epoch": 0.77, "grad_norm": 1.3262757833589587, "learning_rate": 1.3297422034552692e-06, "loss": 0.6585, "step": 12022 }, { "epoch": 0.77, "grad_norm": 1.0689872186470015, "learning_rate": 1.3290383639239347e-06, "loss": 0.7037, "step": 12023 }, { "epoch": 0.77, "grad_norm": 1.5739684294142493, "learning_rate": 1.3283346821628157e-06, "loss": 0.7074, "step": 12024 }, { "epoch": 0.77, "grad_norm": 1.0038415897913904, "learning_rate": 1.3276311582021583e-06, "loss": 0.638, "step": 12025 }, { "epoch": 0.77, "grad_norm": 1.5633638732530615, "learning_rate": 1.3269277920721975e-06, "loss": 0.6524, "step": 12026 }, { "epoch": 0.77, "grad_norm": 1.580611902370099, "learning_rate": 1.3262245838031618e-06, "loss": 0.6587, "step": 12027 }, { "epoch": 0.77, "grad_norm": 1.428474812158143, "learning_rate": 1.3255215334252736e-06, "loss": 0.5958, "step": 12028 }, { "epoch": 0.77, "grad_norm": 1.7131631745769786, "learning_rate": 1.3248186409687491e-06, "loss": 0.6786, "step": 12029 }, { "epoch": 0.77, "grad_norm": 1.6593808289431715, "learning_rate": 1.3241159064637965e-06, "loss": 0.6314, "step": 12030 }, { "epoch": 0.77, "grad_norm": 1.49481340738099, "learning_rate": 1.3234133299406183e-06, "loss": 0.5986, "step": 12031 }, { "epoch": 0.77, "grad_norm": 1.7519843206777541, "learning_rate": 1.3227109114294096e-06, "loss": 0.6993, "step": 12032 }, { "epoch": 0.77, "grad_norm": 1.5748300243747486, "learning_rate": 1.3220086509603569e-06, "loss": 0.6328, "step": 12033 }, { "epoch": 0.77, "grad_norm": 1.5804089348506494, "learning_rate": 1.3213065485636462e-06, "loss": 0.7326, "step": 12034 }, { "epoch": 0.77, "grad_norm": 3.288388114793739, "learning_rate": 1.3206046042694493e-06, "loss": 0.595, "step": 12035 }, { "epoch": 0.77, "grad_norm": 1.6257993793115717, "learning_rate": 1.3199028181079354e-06, "loss": 0.6611, "step": 12036 }, { "epoch": 0.77, "grad_norm": 1.3808825472778579, "learning_rate": 1.3192011901092654e-06, "loss": 0.6102, "step": 12037 }, { "epoch": 0.77, "grad_norm": 1.441160130358813, "learning_rate": 1.3184997203035938e-06, "loss": 0.6572, "step": 12038 }, { "epoch": 0.77, "grad_norm": 1.6596179565903308, "learning_rate": 1.3177984087210682e-06, "loss": 0.6122, "step": 12039 }, { "epoch": 0.77, "grad_norm": 1.5488668304760254, "learning_rate": 1.3170972553918283e-06, "loss": 0.6929, "step": 12040 }, { "epoch": 0.77, "grad_norm": 1.6511481857347614, "learning_rate": 1.3163962603460123e-06, "loss": 0.7058, "step": 12041 }, { "epoch": 0.77, "grad_norm": 1.510242863722192, "learning_rate": 1.315695423613742e-06, "loss": 0.6426, "step": 12042 }, { "epoch": 0.77, "grad_norm": 1.6501183656165728, "learning_rate": 1.3149947452251422e-06, "loss": 0.651, "step": 12043 }, { "epoch": 0.77, "grad_norm": 1.535407683763732, "learning_rate": 1.3142942252103246e-06, "loss": 0.6238, "step": 12044 }, { "epoch": 0.77, "grad_norm": 1.4130114234237843, "learning_rate": 1.3135938635993966e-06, "loss": 0.6538, "step": 12045 }, { "epoch": 0.77, "grad_norm": 1.5449868983168598, "learning_rate": 1.312893660422458e-06, "loss": 0.7417, "step": 12046 }, { "epoch": 0.77, "grad_norm": 1.447233770142253, "learning_rate": 1.312193615709601e-06, "loss": 0.6611, "step": 12047 }, { "epoch": 0.77, "grad_norm": 1.458392772223827, "learning_rate": 1.3114937294909164e-06, "loss": 0.7069, "step": 12048 }, { "epoch": 0.77, "grad_norm": 1.0927490105713187, "learning_rate": 1.310794001796477e-06, "loss": 0.6483, "step": 12049 }, { "epoch": 0.77, "grad_norm": 1.4154997577886628, "learning_rate": 1.310094432656363e-06, "loss": 0.5299, "step": 12050 }, { "epoch": 0.77, "grad_norm": 1.8673960081954453, "learning_rate": 1.3093950221006329e-06, "loss": 0.6892, "step": 12051 }, { "epoch": 0.77, "grad_norm": 1.6250617486239038, "learning_rate": 1.3086957701593523e-06, "loss": 0.7162, "step": 12052 }, { "epoch": 0.77, "grad_norm": 2.0668779145731526, "learning_rate": 1.30799667686257e-06, "loss": 0.6902, "step": 12053 }, { "epoch": 0.77, "grad_norm": 1.4845820868586064, "learning_rate": 1.3072977422403317e-06, "loss": 0.5828, "step": 12054 }, { "epoch": 0.77, "grad_norm": 1.545797302729054, "learning_rate": 1.3065989663226797e-06, "loss": 0.6456, "step": 12055 }, { "epoch": 0.77, "grad_norm": 1.504295623190636, "learning_rate": 1.3059003491396405e-06, "loss": 0.5351, "step": 12056 }, { "epoch": 0.77, "grad_norm": 1.7607453793070909, "learning_rate": 1.3052018907212448e-06, "loss": 0.6494, "step": 12057 }, { "epoch": 0.77, "grad_norm": 2.0599061313076206, "learning_rate": 1.3045035910975045e-06, "loss": 0.653, "step": 12058 }, { "epoch": 0.77, "grad_norm": 1.5017884339083973, "learning_rate": 1.303805450298437e-06, "loss": 0.701, "step": 12059 }, { "epoch": 0.77, "grad_norm": 1.465296337435947, "learning_rate": 1.303107468354044e-06, "loss": 0.6232, "step": 12060 }, { "epoch": 0.77, "grad_norm": 1.8823252432365363, "learning_rate": 1.3024096452943236e-06, "loss": 0.64, "step": 12061 }, { "epoch": 0.77, "grad_norm": 1.2955826857679196, "learning_rate": 1.3017119811492668e-06, "loss": 0.5776, "step": 12062 }, { "epoch": 0.77, "grad_norm": 1.582489430799942, "learning_rate": 1.301014475948857e-06, "loss": 0.6791, "step": 12063 }, { "epoch": 0.77, "grad_norm": 1.5350935310512654, "learning_rate": 1.3003171297230748e-06, "loss": 0.6786, "step": 12064 }, { "epoch": 0.77, "grad_norm": 1.6179628183759847, "learning_rate": 1.2996199425018858e-06, "loss": 0.674, "step": 12065 }, { "epoch": 0.77, "grad_norm": 1.11004926714583, "learning_rate": 1.298922914315257e-06, "loss": 0.6054, "step": 12066 }, { "epoch": 0.77, "grad_norm": 1.407175754071815, "learning_rate": 1.2982260451931445e-06, "loss": 0.6072, "step": 12067 }, { "epoch": 0.77, "grad_norm": 1.883207039886754, "learning_rate": 1.2975293351654982e-06, "loss": 0.631, "step": 12068 }, { "epoch": 0.77, "grad_norm": 1.672944150822706, "learning_rate": 1.2968327842622612e-06, "loss": 0.6609, "step": 12069 }, { "epoch": 0.77, "grad_norm": 1.6384707227751671, "learning_rate": 1.2961363925133696e-06, "loss": 0.6582, "step": 12070 }, { "epoch": 0.77, "grad_norm": 1.0587585281105878, "learning_rate": 1.2954401599487531e-06, "loss": 0.6719, "step": 12071 }, { "epoch": 0.77, "grad_norm": 1.648744803882073, "learning_rate": 1.2947440865983323e-06, "loss": 0.7199, "step": 12072 }, { "epoch": 0.77, "grad_norm": 1.613943167704061, "learning_rate": 1.2940481724920284e-06, "loss": 0.7006, "step": 12073 }, { "epoch": 0.77, "grad_norm": 1.75019791406973, "learning_rate": 1.2933524176597434e-06, "loss": 0.727, "step": 12074 }, { "epoch": 0.77, "grad_norm": 1.8184210881644476, "learning_rate": 1.292656822131384e-06, "loss": 0.7228, "step": 12075 }, { "epoch": 0.77, "grad_norm": 1.4932305068419116, "learning_rate": 1.2919613859368446e-06, "loss": 0.6512, "step": 12076 }, { "epoch": 0.77, "grad_norm": 1.386550038334435, "learning_rate": 1.291266109106013e-06, "loss": 0.6184, "step": 12077 }, { "epoch": 0.77, "grad_norm": 3.040570040455413, "learning_rate": 1.290570991668771e-06, "loss": 0.8034, "step": 12078 }, { "epoch": 0.77, "grad_norm": 1.9378963164905896, "learning_rate": 1.2898760336549931e-06, "loss": 0.722, "step": 12079 }, { "epoch": 0.77, "grad_norm": 1.521473723340233, "learning_rate": 1.2891812350945476e-06, "loss": 0.6949, "step": 12080 }, { "epoch": 0.77, "grad_norm": 1.5523341217559814, "learning_rate": 1.2884865960172931e-06, "loss": 0.6228, "step": 12081 }, { "epoch": 0.77, "grad_norm": 1.0387628268716338, "learning_rate": 1.2877921164530872e-06, "loss": 0.5869, "step": 12082 }, { "epoch": 0.77, "grad_norm": 1.6409394225904894, "learning_rate": 1.287097796431776e-06, "loss": 0.6702, "step": 12083 }, { "epoch": 0.77, "grad_norm": 1.865852233713396, "learning_rate": 1.2864036359831995e-06, "loss": 0.7282, "step": 12084 }, { "epoch": 0.77, "grad_norm": 1.3865946070472477, "learning_rate": 1.2857096351371917e-06, "loss": 0.6071, "step": 12085 }, { "epoch": 0.77, "grad_norm": 1.6169841859885659, "learning_rate": 1.2850157939235785e-06, "loss": 0.6711, "step": 12086 }, { "epoch": 0.77, "grad_norm": 1.768737367301067, "learning_rate": 1.2843221123721804e-06, "loss": 0.619, "step": 12087 }, { "epoch": 0.77, "grad_norm": 1.6915998173817977, "learning_rate": 1.2836285905128105e-06, "loss": 0.7605, "step": 12088 }, { "epoch": 0.77, "grad_norm": 1.5385164665309532, "learning_rate": 1.2829352283752728e-06, "loss": 0.6116, "step": 12089 }, { "epoch": 0.77, "grad_norm": 2.3079270426499807, "learning_rate": 1.2822420259893697e-06, "loss": 0.6148, "step": 12090 }, { "epoch": 0.77, "grad_norm": 1.683075191633886, "learning_rate": 1.2815489833848927e-06, "loss": 0.6648, "step": 12091 }, { "epoch": 0.77, "grad_norm": 2.130642935253526, "learning_rate": 1.2808561005916266e-06, "loss": 0.5849, "step": 12092 }, { "epoch": 0.77, "grad_norm": 1.6228908113997627, "learning_rate": 1.28016337763935e-06, "loss": 0.625, "step": 12093 }, { "epoch": 0.77, "grad_norm": 1.5299049245237293, "learning_rate": 1.2794708145578356e-06, "loss": 0.6372, "step": 12094 }, { "epoch": 0.77, "grad_norm": 1.515152573247586, "learning_rate": 1.278778411376847e-06, "loss": 0.6029, "step": 12095 }, { "epoch": 0.77, "grad_norm": 1.6285649674548612, "learning_rate": 1.278086168126142e-06, "loss": 0.6299, "step": 12096 }, { "epoch": 0.77, "grad_norm": 1.5178159651908785, "learning_rate": 1.2773940848354754e-06, "loss": 0.644, "step": 12097 }, { "epoch": 0.77, "grad_norm": 1.6284051138123505, "learning_rate": 1.2767021615345859e-06, "loss": 0.6163, "step": 12098 }, { "epoch": 0.77, "grad_norm": 1.8333106830658858, "learning_rate": 1.2760103982532152e-06, "loss": 0.6464, "step": 12099 }, { "epoch": 0.77, "grad_norm": 1.5608146962259886, "learning_rate": 1.2753187950210922e-06, "loss": 0.6716, "step": 12100 }, { "epoch": 0.77, "grad_norm": 1.4175232762580152, "learning_rate": 1.274627351867941e-06, "loss": 0.639, "step": 12101 }, { "epoch": 0.77, "grad_norm": 1.4169974413343374, "learning_rate": 1.2739360688234782e-06, "loss": 0.6748, "step": 12102 }, { "epoch": 0.77, "grad_norm": 1.6638263594571727, "learning_rate": 1.2732449459174134e-06, "loss": 0.6819, "step": 12103 }, { "epoch": 0.77, "grad_norm": 1.586280261644666, "learning_rate": 1.2725539831794498e-06, "loss": 0.641, "step": 12104 }, { "epoch": 0.77, "grad_norm": 1.4260797695074738, "learning_rate": 1.2718631806392823e-06, "loss": 0.6457, "step": 12105 }, { "epoch": 0.77, "grad_norm": 1.4881991986304157, "learning_rate": 1.2711725383266044e-06, "loss": 0.7035, "step": 12106 }, { "epoch": 0.77, "grad_norm": 1.5556475831586096, "learning_rate": 1.2704820562710923e-06, "loss": 0.6498, "step": 12107 }, { "epoch": 0.77, "grad_norm": 1.5394189856622864, "learning_rate": 1.2697917345024258e-06, "loss": 0.6563, "step": 12108 }, { "epoch": 0.78, "grad_norm": 1.1705162997574876, "learning_rate": 1.2691015730502732e-06, "loss": 0.7743, "step": 12109 }, { "epoch": 0.78, "grad_norm": 1.7168326134148206, "learning_rate": 1.2684115719442947e-06, "loss": 0.6552, "step": 12110 }, { "epoch": 0.78, "grad_norm": 2.0347843337171043, "learning_rate": 1.2677217312141455e-06, "loss": 0.6856, "step": 12111 }, { "epoch": 0.78, "grad_norm": 1.045597208309387, "learning_rate": 1.2670320508894724e-06, "loss": 0.5326, "step": 12112 }, { "epoch": 0.78, "grad_norm": 1.4828099020917997, "learning_rate": 1.2663425309999205e-06, "loss": 0.6341, "step": 12113 }, { "epoch": 0.78, "grad_norm": 2.5894784692959747, "learning_rate": 1.2656531715751185e-06, "loss": 0.6735, "step": 12114 }, { "epoch": 0.78, "grad_norm": 1.6989940568325865, "learning_rate": 1.2649639726446994e-06, "loss": 0.6677, "step": 12115 }, { "epoch": 0.78, "grad_norm": 1.7112769964130725, "learning_rate": 1.2642749342382772e-06, "loss": 0.7448, "step": 12116 }, { "epoch": 0.78, "grad_norm": 2.1508198944275034, "learning_rate": 1.2635860563854695e-06, "loss": 0.7554, "step": 12117 }, { "epoch": 0.78, "grad_norm": 1.826896768213303, "learning_rate": 1.2628973391158821e-06, "loss": 0.637, "step": 12118 }, { "epoch": 0.78, "grad_norm": 2.5060102484671622, "learning_rate": 1.2622087824591129e-06, "loss": 0.6567, "step": 12119 }, { "epoch": 0.78, "grad_norm": 1.379651156724633, "learning_rate": 1.261520386444759e-06, "loss": 0.594, "step": 12120 }, { "epoch": 0.78, "grad_norm": 1.5495477546195617, "learning_rate": 1.2608321511024007e-06, "loss": 0.6518, "step": 12121 }, { "epoch": 0.78, "grad_norm": 1.5242649419947136, "learning_rate": 1.2601440764616218e-06, "loss": 0.5955, "step": 12122 }, { "epoch": 0.78, "grad_norm": 1.5262617435096553, "learning_rate": 1.2594561625519891e-06, "loss": 0.5869, "step": 12123 }, { "epoch": 0.78, "grad_norm": 1.7322388327623193, "learning_rate": 1.2587684094030723e-06, "loss": 0.7147, "step": 12124 }, { "epoch": 0.78, "grad_norm": 1.8886648635445775, "learning_rate": 1.258080817044428e-06, "loss": 0.6186, "step": 12125 }, { "epoch": 0.78, "grad_norm": 1.7039720321621992, "learning_rate": 1.2573933855056053e-06, "loss": 0.6929, "step": 12126 }, { "epoch": 0.78, "grad_norm": 1.4593953328971094, "learning_rate": 1.256706114816154e-06, "loss": 0.684, "step": 12127 }, { "epoch": 0.78, "grad_norm": 1.4001790614055667, "learning_rate": 1.2560190050056054e-06, "loss": 0.6659, "step": 12128 }, { "epoch": 0.78, "grad_norm": 1.9749913918718134, "learning_rate": 1.2553320561034955e-06, "loss": 0.7329, "step": 12129 }, { "epoch": 0.78, "grad_norm": 1.7440479095569947, "learning_rate": 1.2546452681393417e-06, "loss": 0.6852, "step": 12130 }, { "epoch": 0.78, "grad_norm": 1.2884681138387972, "learning_rate": 1.2539586411426664e-06, "loss": 0.5842, "step": 12131 }, { "epoch": 0.78, "grad_norm": 1.7308446641595643, "learning_rate": 1.2532721751429765e-06, "loss": 0.6017, "step": 12132 }, { "epoch": 0.78, "grad_norm": 1.4944414627036338, "learning_rate": 1.2525858701697762e-06, "loss": 0.6376, "step": 12133 }, { "epoch": 0.78, "grad_norm": 1.4239042942060254, "learning_rate": 1.2518997262525605e-06, "loss": 0.594, "step": 12134 }, { "epoch": 0.78, "grad_norm": 1.575944277082364, "learning_rate": 1.2512137434208188e-06, "loss": 0.6487, "step": 12135 }, { "epoch": 0.78, "grad_norm": 2.1460126939619757, "learning_rate": 1.2505279217040327e-06, "loss": 0.6564, "step": 12136 }, { "epoch": 0.78, "grad_norm": 1.4727387499937474, "learning_rate": 1.2498422611316767e-06, "loss": 0.667, "step": 12137 }, { "epoch": 0.78, "grad_norm": 1.5102763424090107, "learning_rate": 1.2491567617332218e-06, "loss": 0.6862, "step": 12138 }, { "epoch": 0.78, "grad_norm": 1.588282533901749, "learning_rate": 1.2484714235381278e-06, "loss": 0.6199, "step": 12139 }, { "epoch": 0.78, "grad_norm": 1.6355112987321394, "learning_rate": 1.2477862465758484e-06, "loss": 0.6373, "step": 12140 }, { "epoch": 0.78, "grad_norm": 1.5919237852688644, "learning_rate": 1.2471012308758324e-06, "loss": 0.7264, "step": 12141 }, { "epoch": 0.78, "grad_norm": 1.454469379995369, "learning_rate": 1.2464163764675185e-06, "loss": 0.6059, "step": 12142 }, { "epoch": 0.78, "grad_norm": 1.5697106790358395, "learning_rate": 1.2457316833803424e-06, "loss": 0.6576, "step": 12143 }, { "epoch": 0.78, "grad_norm": 1.5880865034384788, "learning_rate": 1.245047151643729e-06, "loss": 0.7668, "step": 12144 }, { "epoch": 0.78, "grad_norm": 1.2240967442039035, "learning_rate": 1.244362781287099e-06, "loss": 0.6245, "step": 12145 }, { "epoch": 0.78, "grad_norm": 1.4722391640389885, "learning_rate": 1.2436785723398637e-06, "loss": 0.6935, "step": 12146 }, { "epoch": 0.78, "grad_norm": 1.6259971477286854, "learning_rate": 1.2429945248314317e-06, "loss": 0.6326, "step": 12147 }, { "epoch": 0.78, "grad_norm": 1.5492041307547608, "learning_rate": 1.2423106387912003e-06, "loss": 0.6252, "step": 12148 }, { "epoch": 0.78, "grad_norm": 1.2398316059066148, "learning_rate": 1.2416269142485615e-06, "loss": 0.6366, "step": 12149 }, { "epoch": 0.78, "grad_norm": 1.6213477594129366, "learning_rate": 1.240943351232901e-06, "loss": 0.5935, "step": 12150 }, { "epoch": 0.78, "grad_norm": 1.9211589370365687, "learning_rate": 1.2402599497735961e-06, "loss": 0.6826, "step": 12151 }, { "epoch": 0.78, "grad_norm": 1.3975448520701799, "learning_rate": 1.2395767099000183e-06, "loss": 0.5648, "step": 12152 }, { "epoch": 0.78, "grad_norm": 1.4916824497195165, "learning_rate": 1.2388936316415317e-06, "loss": 0.7347, "step": 12153 }, { "epoch": 0.78, "grad_norm": 1.5897497870273372, "learning_rate": 1.238210715027492e-06, "loss": 0.649, "step": 12154 }, { "epoch": 0.78, "grad_norm": 1.7041419253735397, "learning_rate": 1.237527960087253e-06, "loss": 0.6758, "step": 12155 }, { "epoch": 0.78, "grad_norm": 1.385087209207145, "learning_rate": 1.2368453668501562e-06, "loss": 0.4913, "step": 12156 }, { "epoch": 0.78, "grad_norm": 1.2132687509889148, "learning_rate": 1.2361629353455378e-06, "loss": 0.6021, "step": 12157 }, { "epoch": 0.78, "grad_norm": 1.8505371899107135, "learning_rate": 1.2354806656027274e-06, "loss": 0.5822, "step": 12158 }, { "epoch": 0.78, "grad_norm": 1.4104071096700441, "learning_rate": 1.234798557651048e-06, "loss": 0.6846, "step": 12159 }, { "epoch": 0.78, "grad_norm": 1.8237558721812999, "learning_rate": 1.234116611519815e-06, "loss": 0.6463, "step": 12160 }, { "epoch": 0.78, "grad_norm": 1.0352787366888232, "learning_rate": 1.233434827238335e-06, "loss": 0.5031, "step": 12161 }, { "epoch": 0.78, "grad_norm": 1.4194586788335772, "learning_rate": 1.232753204835914e-06, "loss": 0.6996, "step": 12162 }, { "epoch": 0.78, "grad_norm": 1.648217735002003, "learning_rate": 1.2320717443418422e-06, "loss": 0.697, "step": 12163 }, { "epoch": 0.78, "grad_norm": 1.5780207247393874, "learning_rate": 1.2313904457854104e-06, "loss": 0.5957, "step": 12164 }, { "epoch": 0.78, "grad_norm": 1.6146986011866722, "learning_rate": 1.2307093091958983e-06, "loss": 0.6343, "step": 12165 }, { "epoch": 0.78, "grad_norm": 1.474564485481857, "learning_rate": 1.2300283346025794e-06, "loss": 0.6616, "step": 12166 }, { "epoch": 0.78, "grad_norm": 1.6845782650033383, "learning_rate": 1.2293475220347212e-06, "loss": 0.7522, "step": 12167 }, { "epoch": 0.78, "grad_norm": 1.7462246142330993, "learning_rate": 1.2286668715215817e-06, "loss": 0.6812, "step": 12168 }, { "epoch": 0.78, "grad_norm": 1.7576066183994754, "learning_rate": 1.2279863830924183e-06, "loss": 0.5935, "step": 12169 }, { "epoch": 0.78, "grad_norm": 1.5771360878532528, "learning_rate": 1.2273060567764711e-06, "loss": 0.7008, "step": 12170 }, { "epoch": 0.78, "grad_norm": 1.6402320368112637, "learning_rate": 1.2266258926029851e-06, "loss": 0.6654, "step": 12171 }, { "epoch": 0.78, "grad_norm": 1.4743540180567014, "learning_rate": 1.225945890601186e-06, "loss": 0.6499, "step": 12172 }, { "epoch": 0.78, "grad_norm": 1.4360364489082553, "learning_rate": 1.2252660508003045e-06, "loss": 0.636, "step": 12173 }, { "epoch": 0.78, "grad_norm": 1.6053785660524746, "learning_rate": 1.2245863732295554e-06, "loss": 0.6609, "step": 12174 }, { "epoch": 0.78, "grad_norm": 1.5518037071327242, "learning_rate": 1.2239068579181497e-06, "loss": 0.6393, "step": 12175 }, { "epoch": 0.78, "grad_norm": 1.5542949279718785, "learning_rate": 1.2232275048952956e-06, "loss": 0.589, "step": 12176 }, { "epoch": 0.78, "grad_norm": 1.1338440722397798, "learning_rate": 1.222548314190184e-06, "loss": 0.5219, "step": 12177 }, { "epoch": 0.78, "grad_norm": 1.5114486060564467, "learning_rate": 1.2218692858320114e-06, "loss": 0.6622, "step": 12178 }, { "epoch": 0.78, "grad_norm": 1.629974474397587, "learning_rate": 1.2211904198499551e-06, "loss": 0.6157, "step": 12179 }, { "epoch": 0.78, "grad_norm": 1.9029240929975566, "learning_rate": 1.2205117162731956e-06, "loss": 0.7258, "step": 12180 }, { "epoch": 0.78, "grad_norm": 1.4738359195040336, "learning_rate": 1.2198331751309006e-06, "loss": 0.6917, "step": 12181 }, { "epoch": 0.78, "grad_norm": 1.4914121731967154, "learning_rate": 1.2191547964522326e-06, "loss": 0.7704, "step": 12182 }, { "epoch": 0.78, "grad_norm": 1.628613290720063, "learning_rate": 1.2184765802663468e-06, "loss": 0.7012, "step": 12183 }, { "epoch": 0.78, "grad_norm": 1.5345358478747024, "learning_rate": 1.21779852660239e-06, "loss": 0.6514, "step": 12184 }, { "epoch": 0.78, "grad_norm": 1.416245017043311, "learning_rate": 1.2171206354895081e-06, "loss": 0.7205, "step": 12185 }, { "epoch": 0.78, "grad_norm": 1.6678662094029557, "learning_rate": 1.2164429069568295e-06, "loss": 0.6163, "step": 12186 }, { "epoch": 0.78, "grad_norm": 1.4855900675651232, "learning_rate": 1.2157653410334875e-06, "loss": 0.6276, "step": 12187 }, { "epoch": 0.78, "grad_norm": 1.3525938235489867, "learning_rate": 1.2150879377485959e-06, "loss": 0.583, "step": 12188 }, { "epoch": 0.78, "grad_norm": 1.656854614830803, "learning_rate": 1.214410697131273e-06, "loss": 0.718, "step": 12189 }, { "epoch": 0.78, "grad_norm": 1.2863534547687103, "learning_rate": 1.213733619210624e-06, "loss": 0.7381, "step": 12190 }, { "epoch": 0.78, "grad_norm": 1.4840903900688445, "learning_rate": 1.2130567040157465e-06, "loss": 0.7386, "step": 12191 }, { "epoch": 0.78, "grad_norm": 1.4036944887781106, "learning_rate": 1.2123799515757372e-06, "loss": 0.6888, "step": 12192 }, { "epoch": 0.78, "grad_norm": 1.4844314876853466, "learning_rate": 1.2117033619196762e-06, "loss": 0.7135, "step": 12193 }, { "epoch": 0.78, "grad_norm": 1.4005095349056451, "learning_rate": 1.2110269350766469e-06, "loss": 0.6337, "step": 12194 }, { "epoch": 0.78, "grad_norm": 1.5058769589724033, "learning_rate": 1.2103506710757156e-06, "loss": 0.5678, "step": 12195 }, { "epoch": 0.78, "grad_norm": 1.6349952876338991, "learning_rate": 1.2096745699459505e-06, "loss": 0.7498, "step": 12196 }, { "epoch": 0.78, "grad_norm": 1.4802885112814013, "learning_rate": 1.2089986317164075e-06, "loss": 0.7543, "step": 12197 }, { "epoch": 0.78, "grad_norm": 1.4730783739941562, "learning_rate": 1.208322856416138e-06, "loss": 0.6235, "step": 12198 }, { "epoch": 0.78, "grad_norm": 1.6026555750205482, "learning_rate": 1.2076472440741844e-06, "loss": 0.7085, "step": 12199 }, { "epoch": 0.78, "grad_norm": 1.5272414375688108, "learning_rate": 1.206971794719582e-06, "loss": 0.6501, "step": 12200 }, { "epoch": 0.78, "grad_norm": 1.6236326518274873, "learning_rate": 1.2062965083813643e-06, "loss": 0.6791, "step": 12201 }, { "epoch": 0.78, "grad_norm": 1.4466585459487682, "learning_rate": 1.2056213850885485e-06, "loss": 0.6237, "step": 12202 }, { "epoch": 0.78, "grad_norm": 1.5605295094107723, "learning_rate": 1.2049464248701537e-06, "loss": 0.6813, "step": 12203 }, { "epoch": 0.78, "grad_norm": 1.6553672898584375, "learning_rate": 1.204271627755187e-06, "loss": 0.6352, "step": 12204 }, { "epoch": 0.78, "grad_norm": 1.7223395597331579, "learning_rate": 1.20359699377265e-06, "loss": 0.6202, "step": 12205 }, { "epoch": 0.78, "grad_norm": 1.6282004518450846, "learning_rate": 1.2029225229515368e-06, "loss": 0.6109, "step": 12206 }, { "epoch": 0.78, "grad_norm": 1.5359627720341311, "learning_rate": 1.202248215320835e-06, "loss": 0.7002, "step": 12207 }, { "epoch": 0.78, "grad_norm": 1.5206109701042438, "learning_rate": 1.201574070909524e-06, "loss": 0.7216, "step": 12208 }, { "epoch": 0.78, "grad_norm": 1.4026742955550695, "learning_rate": 1.2009000897465782e-06, "loss": 0.6957, "step": 12209 }, { "epoch": 0.78, "grad_norm": 1.6569713819266947, "learning_rate": 1.2002262718609625e-06, "loss": 0.6244, "step": 12210 }, { "epoch": 0.78, "grad_norm": 1.5056582614666894, "learning_rate": 1.1995526172816385e-06, "loss": 0.663, "step": 12211 }, { "epoch": 0.78, "grad_norm": 1.3460864703932747, "learning_rate": 1.198879126037557e-06, "loss": 0.6911, "step": 12212 }, { "epoch": 0.78, "grad_norm": 1.456314866097053, "learning_rate": 1.1982057981576634e-06, "loss": 0.6299, "step": 12213 }, { "epoch": 0.78, "grad_norm": 1.5216606864220399, "learning_rate": 1.197532633670896e-06, "loss": 0.704, "step": 12214 }, { "epoch": 0.78, "grad_norm": 1.3737681129505204, "learning_rate": 1.196859632606186e-06, "loss": 0.5907, "step": 12215 }, { "epoch": 0.78, "grad_norm": 1.2039363701772374, "learning_rate": 1.196186794992457e-06, "loss": 0.5136, "step": 12216 }, { "epoch": 0.78, "grad_norm": 1.5229990846388073, "learning_rate": 1.1955141208586257e-06, "loss": 0.6988, "step": 12217 }, { "epoch": 0.78, "grad_norm": 1.513008484663378, "learning_rate": 1.1948416102336063e-06, "loss": 0.5606, "step": 12218 }, { "epoch": 0.78, "grad_norm": 1.4445644626743683, "learning_rate": 1.1941692631462954e-06, "loss": 0.7221, "step": 12219 }, { "epoch": 0.78, "grad_norm": 1.6121707678132948, "learning_rate": 1.193497079625594e-06, "loss": 0.6618, "step": 12220 }, { "epoch": 0.78, "grad_norm": 1.5389178719471457, "learning_rate": 1.1928250597003893e-06, "loss": 0.6815, "step": 12221 }, { "epoch": 0.78, "grad_norm": 1.6445777077228179, "learning_rate": 1.1921532033995636e-06, "loss": 0.6969, "step": 12222 }, { "epoch": 0.78, "grad_norm": 1.5557507391591403, "learning_rate": 1.1914815107519922e-06, "loss": 0.7123, "step": 12223 }, { "epoch": 0.78, "grad_norm": 1.371162313198774, "learning_rate": 1.1908099817865427e-06, "loss": 0.6616, "step": 12224 }, { "epoch": 0.78, "grad_norm": 1.9336384264514583, "learning_rate": 1.1901386165320755e-06, "loss": 0.7044, "step": 12225 }, { "epoch": 0.78, "grad_norm": 1.8853025908143992, "learning_rate": 1.189467415017444e-06, "loss": 0.5925, "step": 12226 }, { "epoch": 0.78, "grad_norm": 1.7147760689913687, "learning_rate": 1.1887963772714982e-06, "loss": 0.7288, "step": 12227 }, { "epoch": 0.78, "grad_norm": 2.5521229502985747, "learning_rate": 1.1881255033230732e-06, "loss": 0.7026, "step": 12228 }, { "epoch": 0.78, "grad_norm": 1.57666198199198, "learning_rate": 1.1874547932010054e-06, "loss": 0.6349, "step": 12229 }, { "epoch": 0.78, "grad_norm": 1.4730717153964703, "learning_rate": 1.18678424693412e-06, "loss": 0.663, "step": 12230 }, { "epoch": 0.78, "grad_norm": 1.4863687899475337, "learning_rate": 1.1861138645512343e-06, "loss": 0.6447, "step": 12231 }, { "epoch": 0.78, "grad_norm": 1.2944636470419946, "learning_rate": 1.1854436460811608e-06, "loss": 0.7061, "step": 12232 }, { "epoch": 0.78, "grad_norm": 1.5570798159874855, "learning_rate": 1.1847735915527026e-06, "loss": 0.6451, "step": 12233 }, { "epoch": 0.78, "grad_norm": 1.7427567037248561, "learning_rate": 1.1841037009946616e-06, "loss": 0.761, "step": 12234 }, { "epoch": 0.78, "grad_norm": 1.6019329420492108, "learning_rate": 1.183433974435822e-06, "loss": 0.6499, "step": 12235 }, { "epoch": 0.78, "grad_norm": 1.7230403230146096, "learning_rate": 1.1827644119049735e-06, "loss": 0.6974, "step": 12236 }, { "epoch": 0.78, "grad_norm": 1.9546634820731768, "learning_rate": 1.1820950134308862e-06, "loss": 0.7771, "step": 12237 }, { "epoch": 0.78, "grad_norm": 1.3463687764874355, "learning_rate": 1.1814257790423345e-06, "loss": 0.6017, "step": 12238 }, { "epoch": 0.78, "grad_norm": 1.4379242615798677, "learning_rate": 1.1807567087680787e-06, "loss": 0.6116, "step": 12239 }, { "epoch": 0.78, "grad_norm": 2.458018307221166, "learning_rate": 1.1800878026368734e-06, "loss": 0.6035, "step": 12240 }, { "epoch": 0.78, "grad_norm": 2.578301347347212, "learning_rate": 1.1794190606774696e-06, "loss": 0.7219, "step": 12241 }, { "epoch": 0.78, "grad_norm": 1.449859256706535, "learning_rate": 1.1787504829186043e-06, "loss": 0.5836, "step": 12242 }, { "epoch": 0.78, "grad_norm": 1.0924377399047598, "learning_rate": 1.178082069389016e-06, "loss": 0.5685, "step": 12243 }, { "epoch": 0.78, "grad_norm": 1.6211167162753912, "learning_rate": 1.1774138201174268e-06, "loss": 0.6442, "step": 12244 }, { "epoch": 0.78, "grad_norm": 1.4698676631260923, "learning_rate": 1.1767457351325605e-06, "loss": 0.5291, "step": 12245 }, { "epoch": 0.78, "grad_norm": 1.4824988135480226, "learning_rate": 1.176077814463129e-06, "loss": 0.6072, "step": 12246 }, { "epoch": 0.78, "grad_norm": 1.6365719175413918, "learning_rate": 1.1754100581378365e-06, "loss": 0.7009, "step": 12247 }, { "epoch": 0.78, "grad_norm": 1.12814435869284, "learning_rate": 1.1747424661853857e-06, "loss": 0.4663, "step": 12248 }, { "epoch": 0.78, "grad_norm": 1.3587183583626326, "learning_rate": 1.174075038634463e-06, "loss": 0.6241, "step": 12249 }, { "epoch": 0.78, "grad_norm": 1.5541624964847098, "learning_rate": 1.1734077755137585e-06, "loss": 0.6112, "step": 12250 }, { "epoch": 0.78, "grad_norm": 1.4474032683949771, "learning_rate": 1.1727406768519444e-06, "loss": 0.7062, "step": 12251 }, { "epoch": 0.78, "grad_norm": 1.5265470777782546, "learning_rate": 1.1720737426776952e-06, "loss": 0.6741, "step": 12252 }, { "epoch": 0.78, "grad_norm": 1.4754000768535305, "learning_rate": 1.171406973019673e-06, "loss": 0.7376, "step": 12253 }, { "epoch": 0.78, "grad_norm": 1.4279368707271491, "learning_rate": 1.1707403679065337e-06, "loss": 0.6084, "step": 12254 }, { "epoch": 0.78, "grad_norm": 1.9097012889844434, "learning_rate": 1.1700739273669277e-06, "loss": 0.7256, "step": 12255 }, { "epoch": 0.78, "grad_norm": 1.266113198653476, "learning_rate": 1.1694076514294945e-06, "loss": 0.6603, "step": 12256 }, { "epoch": 0.78, "grad_norm": 1.0794979074001867, "learning_rate": 1.168741540122874e-06, "loss": 0.6629, "step": 12257 }, { "epoch": 0.78, "grad_norm": 1.7022931967774937, "learning_rate": 1.1680755934756894e-06, "loss": 0.7446, "step": 12258 }, { "epoch": 0.78, "grad_norm": 1.3823611953066457, "learning_rate": 1.1674098115165645e-06, "loss": 0.5989, "step": 12259 }, { "epoch": 0.78, "grad_norm": 1.4737362338072197, "learning_rate": 1.1667441942741132e-06, "loss": 0.7402, "step": 12260 }, { "epoch": 0.78, "grad_norm": 1.403833271036362, "learning_rate": 1.166078741776941e-06, "loss": 0.6454, "step": 12261 }, { "epoch": 0.78, "grad_norm": 1.5435970924484528, "learning_rate": 1.1654134540536487e-06, "loss": 0.6579, "step": 12262 }, { "epoch": 0.78, "grad_norm": 2.8061170737669774, "learning_rate": 1.1647483311328285e-06, "loss": 0.5666, "step": 12263 }, { "epoch": 0.78, "grad_norm": 1.8724411239729246, "learning_rate": 1.1640833730430663e-06, "loss": 0.5769, "step": 12264 }, { "epoch": 0.79, "grad_norm": 1.591604472618925, "learning_rate": 1.1634185798129383e-06, "loss": 0.7014, "step": 12265 }, { "epoch": 0.79, "grad_norm": 1.375471163453167, "learning_rate": 1.1627539514710213e-06, "loss": 0.5922, "step": 12266 }, { "epoch": 0.79, "grad_norm": 1.4402216442383966, "learning_rate": 1.1620894880458732e-06, "loss": 0.7005, "step": 12267 }, { "epoch": 0.79, "grad_norm": 1.0334666423824381, "learning_rate": 1.161425189566056e-06, "loss": 0.6463, "step": 12268 }, { "epoch": 0.79, "grad_norm": 1.6456789817544895, "learning_rate": 1.1607610560601179e-06, "loss": 0.6943, "step": 12269 }, { "epoch": 0.79, "grad_norm": 1.737335067726114, "learning_rate": 1.1600970875566025e-06, "loss": 0.6729, "step": 12270 }, { "epoch": 0.79, "grad_norm": 1.718683460478613, "learning_rate": 1.1594332840840455e-06, "loss": 0.5941, "step": 12271 }, { "epoch": 0.79, "grad_norm": 1.4973069518708386, "learning_rate": 1.1587696456709758e-06, "loss": 0.72, "step": 12272 }, { "epoch": 0.79, "grad_norm": 1.564776485647326, "learning_rate": 1.1581061723459153e-06, "loss": 0.6341, "step": 12273 }, { "epoch": 0.79, "grad_norm": 1.304625756903566, "learning_rate": 1.1574428641373769e-06, "loss": 0.5949, "step": 12274 }, { "epoch": 0.79, "grad_norm": 1.5104771359084896, "learning_rate": 1.1567797210738713e-06, "loss": 0.6564, "step": 12275 }, { "epoch": 0.79, "grad_norm": 1.585052898040926, "learning_rate": 1.1561167431838977e-06, "loss": 0.5793, "step": 12276 }, { "epoch": 0.79, "grad_norm": 1.6185135376020259, "learning_rate": 1.1554539304959494e-06, "loss": 0.7084, "step": 12277 }, { "epoch": 0.79, "grad_norm": 1.4400552201682186, "learning_rate": 1.1547912830385127e-06, "loss": 0.6094, "step": 12278 }, { "epoch": 0.79, "grad_norm": 1.0347310295753094, "learning_rate": 1.1541288008400665e-06, "loss": 0.5731, "step": 12279 }, { "epoch": 0.79, "grad_norm": 1.63651079305548, "learning_rate": 1.1534664839290832e-06, "loss": 0.7168, "step": 12280 }, { "epoch": 0.79, "grad_norm": 1.1132695244681916, "learning_rate": 1.1528043323340281e-06, "loss": 0.5571, "step": 12281 }, { "epoch": 0.79, "grad_norm": 1.4563922327654903, "learning_rate": 1.152142346083357e-06, "loss": 0.605, "step": 12282 }, { "epoch": 0.79, "grad_norm": 0.973459708834002, "learning_rate": 1.151480525205525e-06, "loss": 0.5939, "step": 12283 }, { "epoch": 0.79, "grad_norm": 1.5160577376388134, "learning_rate": 1.1508188697289707e-06, "loss": 0.6742, "step": 12284 }, { "epoch": 0.79, "grad_norm": 1.0779858375884817, "learning_rate": 1.1501573796821348e-06, "loss": 0.6967, "step": 12285 }, { "epoch": 0.79, "grad_norm": 1.5277932318578367, "learning_rate": 1.1494960550934448e-06, "loss": 0.642, "step": 12286 }, { "epoch": 0.79, "grad_norm": 1.7355981119425128, "learning_rate": 1.148834895991323e-06, "loss": 0.5829, "step": 12287 }, { "epoch": 0.79, "grad_norm": 1.6427621449910024, "learning_rate": 1.1481739024041856e-06, "loss": 0.7394, "step": 12288 }, { "epoch": 0.79, "grad_norm": 1.448749720212849, "learning_rate": 1.147513074360438e-06, "loss": 0.608, "step": 12289 }, { "epoch": 0.79, "grad_norm": 1.3812947186116173, "learning_rate": 1.1468524118884866e-06, "loss": 0.6538, "step": 12290 }, { "epoch": 0.79, "grad_norm": 1.7089514962679873, "learning_rate": 1.1461919150167189e-06, "loss": 0.6665, "step": 12291 }, { "epoch": 0.79, "grad_norm": 1.3825961334824912, "learning_rate": 1.1455315837735276e-06, "loss": 0.6243, "step": 12292 }, { "epoch": 0.79, "grad_norm": 1.5343970224172263, "learning_rate": 1.1448714181872867e-06, "loss": 0.765, "step": 12293 }, { "epoch": 0.79, "grad_norm": 1.6008056122723457, "learning_rate": 1.1442114182863728e-06, "loss": 0.8277, "step": 12294 }, { "epoch": 0.79, "grad_norm": 1.529083612795108, "learning_rate": 1.1435515840991502e-06, "loss": 0.7084, "step": 12295 }, { "epoch": 0.79, "grad_norm": 1.547030168113447, "learning_rate": 1.1428919156539769e-06, "loss": 0.7357, "step": 12296 }, { "epoch": 0.79, "grad_norm": 1.1345816185527022, "learning_rate": 1.1422324129792039e-06, "loss": 0.6604, "step": 12297 }, { "epoch": 0.79, "grad_norm": 1.0925912726606861, "learning_rate": 1.1415730761031745e-06, "loss": 0.6511, "step": 12298 }, { "epoch": 0.79, "grad_norm": 1.375832379485128, "learning_rate": 1.1409139050542295e-06, "loss": 0.6429, "step": 12299 }, { "epoch": 0.79, "grad_norm": 1.445042925393247, "learning_rate": 1.1402548998606927e-06, "loss": 0.6765, "step": 12300 }, { "epoch": 0.79, "grad_norm": 1.5513383275055228, "learning_rate": 1.1395960605508916e-06, "loss": 0.7593, "step": 12301 }, { "epoch": 0.79, "grad_norm": 1.75627834809694, "learning_rate": 1.1389373871531395e-06, "loss": 0.6468, "step": 12302 }, { "epoch": 0.79, "grad_norm": 1.2318852952497141, "learning_rate": 1.1382788796957456e-06, "loss": 0.5984, "step": 12303 }, { "epoch": 0.79, "grad_norm": 1.6230392514044247, "learning_rate": 1.1376205382070105e-06, "loss": 0.6355, "step": 12304 }, { "epoch": 0.79, "grad_norm": 1.9422539782357096, "learning_rate": 1.1369623627152276e-06, "loss": 0.6006, "step": 12305 }, { "epoch": 0.79, "grad_norm": 1.635367201883613, "learning_rate": 1.1363043532486879e-06, "loss": 0.7046, "step": 12306 }, { "epoch": 0.79, "grad_norm": 1.64221489985101, "learning_rate": 1.1356465098356656e-06, "loss": 0.6528, "step": 12307 }, { "epoch": 0.79, "grad_norm": 1.5301118120638253, "learning_rate": 1.1349888325044383e-06, "loss": 0.6559, "step": 12308 }, { "epoch": 0.79, "grad_norm": 1.0178591185771506, "learning_rate": 1.1343313212832674e-06, "loss": 0.6413, "step": 12309 }, { "epoch": 0.79, "grad_norm": 1.1331628034051806, "learning_rate": 1.1336739762004144e-06, "loss": 0.752, "step": 12310 }, { "epoch": 0.79, "grad_norm": 1.6330932089215524, "learning_rate": 1.13301679728413e-06, "loss": 0.6701, "step": 12311 }, { "epoch": 0.79, "grad_norm": 1.6611975605393186, "learning_rate": 1.1323597845626566e-06, "loss": 0.675, "step": 12312 }, { "epoch": 0.79, "grad_norm": 1.7258483012188934, "learning_rate": 1.1317029380642353e-06, "loss": 0.6877, "step": 12313 }, { "epoch": 0.79, "grad_norm": 1.7006298339110582, "learning_rate": 1.1310462578170906e-06, "loss": 0.6116, "step": 12314 }, { "epoch": 0.79, "grad_norm": 1.6056560150703731, "learning_rate": 1.1303897438494503e-06, "loss": 0.7035, "step": 12315 }, { "epoch": 0.79, "grad_norm": 1.054134601882692, "learning_rate": 1.129733396189525e-06, "loss": 0.6486, "step": 12316 }, { "epoch": 0.79, "grad_norm": 2.048178632316979, "learning_rate": 1.1290772148655278e-06, "loss": 0.7357, "step": 12317 }, { "epoch": 0.79, "grad_norm": 1.7746355817520914, "learning_rate": 1.1284211999056572e-06, "loss": 0.6645, "step": 12318 }, { "epoch": 0.79, "grad_norm": 1.4591694773230943, "learning_rate": 1.1277653513381083e-06, "loss": 0.6388, "step": 12319 }, { "epoch": 0.79, "grad_norm": 1.6368577437257705, "learning_rate": 1.1271096691910682e-06, "loss": 0.7146, "step": 12320 }, { "epoch": 0.79, "grad_norm": 1.5399285904541717, "learning_rate": 1.1264541534927148e-06, "loss": 0.6534, "step": 12321 }, { "epoch": 0.79, "grad_norm": 1.6989207200044592, "learning_rate": 1.1257988042712254e-06, "loss": 0.6057, "step": 12322 }, { "epoch": 0.79, "grad_norm": 1.6253620882693032, "learning_rate": 1.1251436215547596e-06, "loss": 0.6401, "step": 12323 }, { "epoch": 0.79, "grad_norm": 1.6841802213882544, "learning_rate": 1.1244886053714803e-06, "loss": 0.5679, "step": 12324 }, { "epoch": 0.79, "grad_norm": 2.4886514485974254, "learning_rate": 1.1238337557495372e-06, "loss": 0.617, "step": 12325 }, { "epoch": 0.79, "grad_norm": 1.8780960848683563, "learning_rate": 1.1231790727170744e-06, "loss": 0.7272, "step": 12326 }, { "epoch": 0.79, "grad_norm": 1.6337323768421528, "learning_rate": 1.1225245563022285e-06, "loss": 0.6505, "step": 12327 }, { "epoch": 0.79, "grad_norm": 1.2004751581123831, "learning_rate": 1.1218702065331295e-06, "loss": 0.7362, "step": 12328 }, { "epoch": 0.79, "grad_norm": 1.4482226696157952, "learning_rate": 1.1212160234378999e-06, "loss": 0.604, "step": 12329 }, { "epoch": 0.79, "grad_norm": 1.5897128873508843, "learning_rate": 1.1205620070446537e-06, "loss": 0.6559, "step": 12330 }, { "epoch": 0.79, "grad_norm": 1.1323608185399505, "learning_rate": 1.1199081573815023e-06, "loss": 0.6919, "step": 12331 }, { "epoch": 0.79, "grad_norm": 1.1144409260089045, "learning_rate": 1.1192544744765448e-06, "loss": 0.669, "step": 12332 }, { "epoch": 0.79, "grad_norm": 1.5558479784528778, "learning_rate": 1.1186009583578761e-06, "loss": 0.6409, "step": 12333 }, { "epoch": 0.79, "grad_norm": 1.5727978709026105, "learning_rate": 1.1179476090535818e-06, "loss": 0.6156, "step": 12334 }, { "epoch": 0.79, "grad_norm": 1.5189378654472407, "learning_rate": 1.1172944265917419e-06, "loss": 0.6836, "step": 12335 }, { "epoch": 0.79, "grad_norm": 1.5414782302422503, "learning_rate": 1.1166414110004286e-06, "loss": 0.6337, "step": 12336 }, { "epoch": 0.79, "grad_norm": 1.749726554933897, "learning_rate": 1.1159885623077076e-06, "loss": 0.698, "step": 12337 }, { "epoch": 0.79, "grad_norm": 1.5704977173479555, "learning_rate": 1.115335880541637e-06, "loss": 0.6248, "step": 12338 }, { "epoch": 0.79, "grad_norm": 1.8575041826972523, "learning_rate": 1.1146833657302659e-06, "loss": 0.6421, "step": 12339 }, { "epoch": 0.79, "grad_norm": 1.187132425248739, "learning_rate": 1.1140310179016412e-06, "loss": 0.7734, "step": 12340 }, { "epoch": 0.79, "grad_norm": 1.1724125715307492, "learning_rate": 1.1133788370837972e-06, "loss": 0.5774, "step": 12341 }, { "epoch": 0.79, "grad_norm": 1.3464992643293723, "learning_rate": 1.1127268233047645e-06, "loss": 0.6354, "step": 12342 }, { "epoch": 0.79, "grad_norm": 1.3866242643622233, "learning_rate": 1.1120749765925643e-06, "loss": 0.6618, "step": 12343 }, { "epoch": 0.79, "grad_norm": 1.676756662069165, "learning_rate": 1.111423296975212e-06, "loss": 0.7147, "step": 12344 }, { "epoch": 0.79, "grad_norm": 1.1615689871935324, "learning_rate": 1.1107717844807153e-06, "loss": 0.7331, "step": 12345 }, { "epoch": 0.79, "grad_norm": 1.4081993526662198, "learning_rate": 1.110120439137075e-06, "loss": 0.6335, "step": 12346 }, { "epoch": 0.79, "grad_norm": 1.087870125740669, "learning_rate": 1.1094692609722829e-06, "loss": 0.7252, "step": 12347 }, { "epoch": 0.79, "grad_norm": 1.253174012533749, "learning_rate": 1.1088182500143286e-06, "loss": 0.7216, "step": 12348 }, { "epoch": 0.79, "grad_norm": 1.6481629761892673, "learning_rate": 1.108167406291189e-06, "loss": 0.6295, "step": 12349 }, { "epoch": 0.79, "grad_norm": 1.6757002298844206, "learning_rate": 1.1075167298308364e-06, "loss": 0.7188, "step": 12350 }, { "epoch": 0.79, "grad_norm": 1.5646693835626193, "learning_rate": 1.1068662206612363e-06, "loss": 0.623, "step": 12351 }, { "epoch": 0.79, "grad_norm": 1.6382979898244574, "learning_rate": 1.1062158788103444e-06, "loss": 0.6971, "step": 12352 }, { "epoch": 0.79, "grad_norm": 1.3658196915167886, "learning_rate": 1.1055657043061124e-06, "loss": 0.7115, "step": 12353 }, { "epoch": 0.79, "grad_norm": 1.3578641014258779, "learning_rate": 1.104915697176482e-06, "loss": 0.7355, "step": 12354 }, { "epoch": 0.79, "grad_norm": 1.6499275136386469, "learning_rate": 1.104265857449393e-06, "loss": 0.6401, "step": 12355 }, { "epoch": 0.79, "grad_norm": 1.8177898281463036, "learning_rate": 1.1036161851527682e-06, "loss": 0.713, "step": 12356 }, { "epoch": 0.79, "grad_norm": 1.4796716796763003, "learning_rate": 1.1029666803145356e-06, "loss": 0.603, "step": 12357 }, { "epoch": 0.79, "grad_norm": 1.4424370529614645, "learning_rate": 1.1023173429626032e-06, "loss": 0.6632, "step": 12358 }, { "epoch": 0.79, "grad_norm": 1.6055728719589406, "learning_rate": 1.101668173124883e-06, "loss": 0.6015, "step": 12359 }, { "epoch": 0.79, "grad_norm": 1.7312484107641886, "learning_rate": 1.1010191708292728e-06, "loss": 0.7314, "step": 12360 }, { "epoch": 0.79, "grad_norm": 1.48876089750945, "learning_rate": 1.1003703361036644e-06, "loss": 0.6983, "step": 12361 }, { "epoch": 0.79, "grad_norm": 1.8844869159290052, "learning_rate": 1.0997216689759472e-06, "loss": 0.6604, "step": 12362 }, { "epoch": 0.79, "grad_norm": 1.7562701622339523, "learning_rate": 1.0990731694739947e-06, "loss": 0.7029, "step": 12363 }, { "epoch": 0.79, "grad_norm": 2.3441701282883205, "learning_rate": 1.0984248376256835e-06, "loss": 0.6604, "step": 12364 }, { "epoch": 0.79, "grad_norm": 1.4357530291691265, "learning_rate": 1.0977766734588707e-06, "loss": 0.6924, "step": 12365 }, { "epoch": 0.79, "grad_norm": 1.6087140790043586, "learning_rate": 1.097128677001419e-06, "loss": 0.6938, "step": 12366 }, { "epoch": 0.79, "grad_norm": 1.684069052510491, "learning_rate": 1.0964808482811751e-06, "loss": 0.6258, "step": 12367 }, { "epoch": 0.79, "grad_norm": 1.5637213148240268, "learning_rate": 1.0958331873259808e-06, "loss": 0.7045, "step": 12368 }, { "epoch": 0.79, "grad_norm": 1.815316122772254, "learning_rate": 1.0951856941636752e-06, "loss": 0.6621, "step": 12369 }, { "epoch": 0.79, "grad_norm": 1.703158397355091, "learning_rate": 1.0945383688220807e-06, "loss": 0.7067, "step": 12370 }, { "epoch": 0.79, "grad_norm": 1.4726465305557321, "learning_rate": 1.093891211329023e-06, "loss": 0.5966, "step": 12371 }, { "epoch": 0.79, "grad_norm": 1.7943335528725404, "learning_rate": 1.0932442217123106e-06, "loss": 0.6766, "step": 12372 }, { "epoch": 0.79, "grad_norm": 1.7077061034143974, "learning_rate": 1.0925973999997535e-06, "loss": 0.6965, "step": 12373 }, { "epoch": 0.79, "grad_norm": 1.5381668297090736, "learning_rate": 1.0919507462191498e-06, "loss": 0.6163, "step": 12374 }, { "epoch": 0.79, "grad_norm": 1.8196232425448027, "learning_rate": 1.091304260398291e-06, "loss": 0.6606, "step": 12375 }, { "epoch": 0.79, "grad_norm": 1.4333125015579022, "learning_rate": 1.0906579425649622e-06, "loss": 0.6669, "step": 12376 }, { "epoch": 0.79, "grad_norm": 1.733492515338576, "learning_rate": 1.0900117927469384e-06, "loss": 0.7034, "step": 12377 }, { "epoch": 0.79, "grad_norm": 1.5293907495837655, "learning_rate": 1.0893658109719946e-06, "loss": 0.6391, "step": 12378 }, { "epoch": 0.79, "grad_norm": 1.23515361864763, "learning_rate": 1.088719997267888e-06, "loss": 0.6625, "step": 12379 }, { "epoch": 0.79, "grad_norm": 1.4898068526763855, "learning_rate": 1.0880743516623804e-06, "loss": 0.6717, "step": 12380 }, { "epoch": 0.79, "grad_norm": 1.5266163939866202, "learning_rate": 1.087428874183214e-06, "loss": 0.6434, "step": 12381 }, { "epoch": 0.79, "grad_norm": 1.6026232139863539, "learning_rate": 1.0867835648581344e-06, "loss": 0.6465, "step": 12382 }, { "epoch": 0.79, "grad_norm": 1.5597526970800302, "learning_rate": 1.0861384237148749e-06, "loss": 0.6855, "step": 12383 }, { "epoch": 0.79, "grad_norm": 1.440064690571713, "learning_rate": 1.0854934507811609e-06, "loss": 0.652, "step": 12384 }, { "epoch": 0.79, "grad_norm": 1.4030619700917637, "learning_rate": 1.0848486460847135e-06, "loss": 0.6569, "step": 12385 }, { "epoch": 0.79, "grad_norm": 1.6133675405482346, "learning_rate": 1.084204009653243e-06, "loss": 0.5832, "step": 12386 }, { "epoch": 0.79, "grad_norm": 1.307600019576034, "learning_rate": 1.083559541514459e-06, "loss": 0.601, "step": 12387 }, { "epoch": 0.79, "grad_norm": 1.2417321077127053, "learning_rate": 1.082915241696053e-06, "loss": 0.6672, "step": 12388 }, { "epoch": 0.79, "grad_norm": 1.5859027889624053, "learning_rate": 1.0822711102257205e-06, "loss": 0.6645, "step": 12389 }, { "epoch": 0.79, "grad_norm": 1.0702516481227584, "learning_rate": 1.0816271471311434e-06, "loss": 0.6335, "step": 12390 }, { "epoch": 0.79, "grad_norm": 1.5585575507604308, "learning_rate": 1.0809833524399981e-06, "loss": 0.6812, "step": 12391 }, { "epoch": 0.79, "grad_norm": 1.389996417904735, "learning_rate": 1.0803397261799536e-06, "loss": 0.5991, "step": 12392 }, { "epoch": 0.79, "grad_norm": 1.8378099882275283, "learning_rate": 1.079696268378671e-06, "loss": 0.6874, "step": 12393 }, { "epoch": 0.79, "grad_norm": 1.5709482566594701, "learning_rate": 1.0790529790638048e-06, "loss": 0.6869, "step": 12394 }, { "epoch": 0.79, "grad_norm": 1.7456938225246967, "learning_rate": 1.0784098582630015e-06, "loss": 0.6393, "step": 12395 }, { "epoch": 0.79, "grad_norm": 1.7432517518756034, "learning_rate": 1.077766906003903e-06, "loss": 0.6974, "step": 12396 }, { "epoch": 0.79, "grad_norm": 1.5370363072210231, "learning_rate": 1.0771241223141415e-06, "loss": 0.6937, "step": 12397 }, { "epoch": 0.79, "grad_norm": 1.5348472345958677, "learning_rate": 1.0764815072213425e-06, "loss": 0.7522, "step": 12398 }, { "epoch": 0.79, "grad_norm": 1.7002563732801725, "learning_rate": 1.0758390607531232e-06, "loss": 0.645, "step": 12399 }, { "epoch": 0.79, "grad_norm": 1.8057680462742671, "learning_rate": 1.0751967829370957e-06, "loss": 0.8037, "step": 12400 }, { "epoch": 0.79, "grad_norm": 1.4958322723299513, "learning_rate": 1.0745546738008627e-06, "loss": 0.6863, "step": 12401 }, { "epoch": 0.79, "grad_norm": 5.119529159250173, "learning_rate": 1.0739127333720223e-06, "loss": 0.6789, "step": 12402 }, { "epoch": 0.79, "grad_norm": 1.6747363410053613, "learning_rate": 1.0732709616781606e-06, "loss": 0.5968, "step": 12403 }, { "epoch": 0.79, "grad_norm": 1.5932560487862384, "learning_rate": 1.0726293587468634e-06, "loss": 0.6326, "step": 12404 }, { "epoch": 0.79, "grad_norm": 1.6611088462834853, "learning_rate": 1.071987924605704e-06, "loss": 0.6408, "step": 12405 }, { "epoch": 0.79, "grad_norm": 1.014583149143979, "learning_rate": 1.0713466592822498e-06, "loss": 0.6563, "step": 12406 }, { "epoch": 0.79, "grad_norm": 1.633565948722996, "learning_rate": 1.0707055628040602e-06, "loss": 0.6886, "step": 12407 }, { "epoch": 0.79, "grad_norm": 1.6952691877530235, "learning_rate": 1.0700646351986899e-06, "loss": 0.6339, "step": 12408 }, { "epoch": 0.79, "grad_norm": 1.722966164718088, "learning_rate": 1.0694238764936827e-06, "loss": 0.607, "step": 12409 }, { "epoch": 0.79, "grad_norm": 1.5518525586393166, "learning_rate": 1.0687832867165771e-06, "loss": 0.6586, "step": 12410 }, { "epoch": 0.79, "grad_norm": 1.546307381546482, "learning_rate": 1.0681428658949083e-06, "loss": 0.8459, "step": 12411 }, { "epoch": 0.79, "grad_norm": 1.7286268217973306, "learning_rate": 1.067502614056194e-06, "loss": 0.6629, "step": 12412 }, { "epoch": 0.79, "grad_norm": 2.0644011760067853, "learning_rate": 1.0668625312279552e-06, "loss": 0.6828, "step": 12413 }, { "epoch": 0.79, "grad_norm": 1.0783054064088768, "learning_rate": 1.0662226174377005e-06, "loss": 0.551, "step": 12414 }, { "epoch": 0.79, "grad_norm": 1.6541094659688385, "learning_rate": 1.0655828727129319e-06, "loss": 0.6866, "step": 12415 }, { "epoch": 0.79, "grad_norm": 1.4244297848238914, "learning_rate": 1.0649432970811434e-06, "loss": 0.6007, "step": 12416 }, { "epoch": 0.79, "grad_norm": 2.787724498109962, "learning_rate": 1.0643038905698239e-06, "loss": 0.7196, "step": 12417 }, { "epoch": 0.79, "grad_norm": 1.7753454340748094, "learning_rate": 1.0636646532064531e-06, "loss": 0.7867, "step": 12418 }, { "epoch": 0.79, "grad_norm": 1.7720688700595928, "learning_rate": 1.0630255850185024e-06, "loss": 0.722, "step": 12419 }, { "epoch": 0.79, "grad_norm": 1.565459112128491, "learning_rate": 1.0623866860334425e-06, "loss": 0.6339, "step": 12420 }, { "epoch": 0.8, "grad_norm": 1.6345218564077248, "learning_rate": 1.0617479562787258e-06, "loss": 0.5907, "step": 12421 }, { "epoch": 0.8, "grad_norm": 1.6095030693115153, "learning_rate": 1.061109395781808e-06, "loss": 0.645, "step": 12422 }, { "epoch": 0.8, "grad_norm": 1.4644697627422112, "learning_rate": 1.060471004570131e-06, "loss": 0.6884, "step": 12423 }, { "epoch": 0.8, "grad_norm": 1.7786218168440038, "learning_rate": 1.059832782671133e-06, "loss": 0.6514, "step": 12424 }, { "epoch": 0.8, "grad_norm": 1.6542974177893726, "learning_rate": 1.0591947301122424e-06, "loss": 0.6963, "step": 12425 }, { "epoch": 0.8, "grad_norm": 1.233585347634611, "learning_rate": 1.0585568469208795e-06, "loss": 0.6387, "step": 12426 }, { "epoch": 0.8, "grad_norm": 1.6763095928309397, "learning_rate": 1.0579191331244648e-06, "loss": 0.6452, "step": 12427 }, { "epoch": 0.8, "grad_norm": 1.6098260646373999, "learning_rate": 1.057281588750399e-06, "loss": 0.5744, "step": 12428 }, { "epoch": 0.8, "grad_norm": 1.633262666070293, "learning_rate": 1.0566442138260885e-06, "loss": 0.6653, "step": 12429 }, { "epoch": 0.8, "grad_norm": 1.6995060815403344, "learning_rate": 1.0560070083789214e-06, "loss": 0.7087, "step": 12430 }, { "epoch": 0.8, "grad_norm": 1.574282493703948, "learning_rate": 1.0553699724362866e-06, "loss": 0.5839, "step": 12431 }, { "epoch": 0.8, "grad_norm": 1.6606777963839234, "learning_rate": 1.0547331060255612e-06, "loss": 0.7, "step": 12432 }, { "epoch": 0.8, "grad_norm": 1.5053794232186797, "learning_rate": 1.0540964091741157e-06, "loss": 0.5975, "step": 12433 }, { "epoch": 0.8, "grad_norm": 1.633412645771372, "learning_rate": 1.0534598819093178e-06, "loss": 0.6261, "step": 12434 }, { "epoch": 0.8, "grad_norm": 1.6751071862789475, "learning_rate": 1.0528235242585188e-06, "loss": 0.6641, "step": 12435 }, { "epoch": 0.8, "grad_norm": 0.9463022096213022, "learning_rate": 1.052187336249073e-06, "loss": 0.6284, "step": 12436 }, { "epoch": 0.8, "grad_norm": 1.961519550847746, "learning_rate": 1.0515513179083176e-06, "loss": 0.7183, "step": 12437 }, { "epoch": 0.8, "grad_norm": 1.5565925376352097, "learning_rate": 1.0509154692635908e-06, "loss": 0.6343, "step": 12438 }, { "epoch": 0.8, "grad_norm": 1.6188373291155822, "learning_rate": 1.050279790342219e-06, "loss": 0.6108, "step": 12439 }, { "epoch": 0.8, "grad_norm": 2.5017325239489616, "learning_rate": 1.0496442811715224e-06, "loss": 0.637, "step": 12440 }, { "epoch": 0.8, "grad_norm": 1.4989526284638288, "learning_rate": 1.0490089417788135e-06, "loss": 0.6272, "step": 12441 }, { "epoch": 0.8, "grad_norm": 1.1618104625789007, "learning_rate": 1.0483737721913968e-06, "loss": 0.5791, "step": 12442 }, { "epoch": 0.8, "grad_norm": 1.5516310911359243, "learning_rate": 1.0477387724365752e-06, "loss": 0.6975, "step": 12443 }, { "epoch": 0.8, "grad_norm": 6.249347413935927, "learning_rate": 1.0471039425416329e-06, "loss": 0.7668, "step": 12444 }, { "epoch": 0.8, "grad_norm": 1.6129638178367878, "learning_rate": 1.0464692825338584e-06, "loss": 0.7045, "step": 12445 }, { "epoch": 0.8, "grad_norm": 1.4466982872053449, "learning_rate": 1.0458347924405266e-06, "loss": 0.6342, "step": 12446 }, { "epoch": 0.8, "grad_norm": 1.4596253285553757, "learning_rate": 1.045200472288907e-06, "loss": 0.5782, "step": 12447 }, { "epoch": 0.8, "grad_norm": 1.4225043241254647, "learning_rate": 1.0445663221062606e-06, "loss": 0.6739, "step": 12448 }, { "epoch": 0.8, "grad_norm": 1.5103198630483279, "learning_rate": 1.0439323419198422e-06, "loss": 0.6801, "step": 12449 }, { "epoch": 0.8, "grad_norm": 1.6677983461507961, "learning_rate": 1.0432985317568988e-06, "loss": 0.5763, "step": 12450 }, { "epoch": 0.8, "grad_norm": 1.5377714861104073, "learning_rate": 1.0426648916446692e-06, "loss": 0.7255, "step": 12451 }, { "epoch": 0.8, "grad_norm": 1.570777995125124, "learning_rate": 1.0420314216103883e-06, "loss": 0.5462, "step": 12452 }, { "epoch": 0.8, "grad_norm": 1.918865930153034, "learning_rate": 1.0413981216812802e-06, "loss": 0.7298, "step": 12453 }, { "epoch": 0.8, "grad_norm": 1.542554051546032, "learning_rate": 1.0407649918845624e-06, "loss": 0.5359, "step": 12454 }, { "epoch": 0.8, "grad_norm": 1.5337080836591444, "learning_rate": 1.0401320322474456e-06, "loss": 0.6128, "step": 12455 }, { "epoch": 0.8, "grad_norm": 1.9707084845034952, "learning_rate": 1.0394992427971334e-06, "loss": 0.7324, "step": 12456 }, { "epoch": 0.8, "grad_norm": 1.7138580804082073, "learning_rate": 1.0388666235608219e-06, "loss": 0.7502, "step": 12457 }, { "epoch": 0.8, "grad_norm": 1.5532412221970495, "learning_rate": 1.0382341745656994e-06, "loss": 0.6502, "step": 12458 }, { "epoch": 0.8, "grad_norm": 1.5172971912745716, "learning_rate": 1.0376018958389472e-06, "loss": 0.694, "step": 12459 }, { "epoch": 0.8, "grad_norm": 1.2704225769304986, "learning_rate": 1.0369697874077377e-06, "loss": 0.6842, "step": 12460 }, { "epoch": 0.8, "grad_norm": 1.1576645134213832, "learning_rate": 1.036337849299241e-06, "loss": 0.6606, "step": 12461 }, { "epoch": 0.8, "grad_norm": 1.595235304113107, "learning_rate": 1.0357060815406146e-06, "loss": 0.6243, "step": 12462 }, { "epoch": 0.8, "grad_norm": 2.0603063977358307, "learning_rate": 1.0350744841590106e-06, "loss": 0.6857, "step": 12463 }, { "epoch": 0.8, "grad_norm": 1.4643702802451197, "learning_rate": 1.0344430571815734e-06, "loss": 0.6346, "step": 12464 }, { "epoch": 0.8, "grad_norm": 1.4729658726560775, "learning_rate": 1.0338118006354413e-06, "loss": 0.66, "step": 12465 }, { "epoch": 0.8, "grad_norm": 1.4347686678738978, "learning_rate": 1.033180714547744e-06, "loss": 0.6258, "step": 12466 }, { "epoch": 0.8, "grad_norm": 1.6058181454002416, "learning_rate": 1.032549798945604e-06, "loss": 0.6227, "step": 12467 }, { "epoch": 0.8, "grad_norm": 1.3894952411168862, "learning_rate": 1.0319190538561358e-06, "loss": 0.6249, "step": 12468 }, { "epoch": 0.8, "grad_norm": 1.4665441509891781, "learning_rate": 1.0312884793064493e-06, "loss": 0.6848, "step": 12469 }, { "epoch": 0.8, "grad_norm": 1.7532503883686674, "learning_rate": 1.030658075323645e-06, "loss": 0.6845, "step": 12470 }, { "epoch": 0.8, "grad_norm": 1.7570184261034212, "learning_rate": 1.0300278419348158e-06, "loss": 0.6299, "step": 12471 }, { "epoch": 0.8, "grad_norm": 1.7276500573348699, "learning_rate": 1.029397779167048e-06, "loss": 0.6102, "step": 12472 }, { "epoch": 0.8, "grad_norm": 2.0848260570438444, "learning_rate": 1.02876788704742e-06, "loss": 0.718, "step": 12473 }, { "epoch": 0.8, "grad_norm": 1.1120155252575405, "learning_rate": 1.0281381656030038e-06, "loss": 0.7353, "step": 12474 }, { "epoch": 0.8, "grad_norm": 1.9828711482427492, "learning_rate": 1.0275086148608614e-06, "loss": 0.7617, "step": 12475 }, { "epoch": 0.8, "grad_norm": 1.477239093640486, "learning_rate": 1.026879234848055e-06, "loss": 0.6489, "step": 12476 }, { "epoch": 0.8, "grad_norm": 1.4433690877884957, "learning_rate": 1.026250025591627e-06, "loss": 0.6956, "step": 12477 }, { "epoch": 0.8, "grad_norm": 1.4855625399879782, "learning_rate": 1.025620987118624e-06, "loss": 0.7142, "step": 12478 }, { "epoch": 0.8, "grad_norm": 1.5971337178033187, "learning_rate": 1.0249921194560803e-06, "loss": 0.6214, "step": 12479 }, { "epoch": 0.8, "grad_norm": 1.5410068118968832, "learning_rate": 1.0243634226310224e-06, "loss": 0.6483, "step": 12480 }, { "epoch": 0.8, "grad_norm": 1.491100322427246, "learning_rate": 1.0237348966704708e-06, "loss": 0.674, "step": 12481 }, { "epoch": 0.8, "grad_norm": 1.4840340942936474, "learning_rate": 1.0231065416014363e-06, "loss": 0.6215, "step": 12482 }, { "epoch": 0.8, "grad_norm": 2.013105622052509, "learning_rate": 1.022478357450929e-06, "loss": 0.6093, "step": 12483 }, { "epoch": 0.8, "grad_norm": 2.1399910373678943, "learning_rate": 1.021850344245941e-06, "loss": 0.725, "step": 12484 }, { "epoch": 0.8, "grad_norm": 1.4666266641821046, "learning_rate": 1.0212225020134693e-06, "loss": 0.683, "step": 12485 }, { "epoch": 0.8, "grad_norm": 1.0997297972634976, "learning_rate": 1.0205948307804904e-06, "loss": 0.5727, "step": 12486 }, { "epoch": 0.8, "grad_norm": 1.5938051861260099, "learning_rate": 1.0199673305739854e-06, "loss": 0.6934, "step": 12487 }, { "epoch": 0.8, "grad_norm": 1.5941202929077674, "learning_rate": 1.0193400014209215e-06, "loss": 0.6581, "step": 12488 }, { "epoch": 0.8, "grad_norm": 1.6204948964243147, "learning_rate": 1.0187128433482601e-06, "loss": 0.5524, "step": 12489 }, { "epoch": 0.8, "grad_norm": 1.8208003317138435, "learning_rate": 1.0180858563829544e-06, "loss": 0.6772, "step": 12490 }, { "epoch": 0.8, "grad_norm": 1.5579827304348115, "learning_rate": 1.0174590405519502e-06, "loss": 0.6447, "step": 12491 }, { "epoch": 0.8, "grad_norm": 1.6794858535395474, "learning_rate": 1.0168323958821908e-06, "loss": 0.7139, "step": 12492 }, { "epoch": 0.8, "grad_norm": 1.5977470359303576, "learning_rate": 1.0162059224006027e-06, "loss": 0.5483, "step": 12493 }, { "epoch": 0.8, "grad_norm": 1.5137039167220145, "learning_rate": 1.015579620134114e-06, "loss": 0.6062, "step": 12494 }, { "epoch": 0.8, "grad_norm": 1.102537164135865, "learning_rate": 1.0149534891096408e-06, "loss": 0.6421, "step": 12495 }, { "epoch": 0.8, "grad_norm": 2.057484453179702, "learning_rate": 1.0143275293540928e-06, "loss": 0.6889, "step": 12496 }, { "epoch": 0.8, "grad_norm": 1.559788555570263, "learning_rate": 1.0137017408943729e-06, "loss": 0.6482, "step": 12497 }, { "epoch": 0.8, "grad_norm": 1.585235212099752, "learning_rate": 1.0130761237573739e-06, "loss": 0.6523, "step": 12498 }, { "epoch": 0.8, "grad_norm": 1.3714913310534873, "learning_rate": 1.0124506779699882e-06, "loss": 0.6479, "step": 12499 }, { "epoch": 0.8, "grad_norm": 1.4804493296633554, "learning_rate": 1.0118254035590912e-06, "loss": 0.6491, "step": 12500 }, { "epoch": 0.8, "grad_norm": 1.563979083384002, "learning_rate": 1.0112003005515603e-06, "loss": 0.6221, "step": 12501 }, { "epoch": 0.8, "grad_norm": 1.810158394102471, "learning_rate": 1.0105753689742564e-06, "loss": 0.6433, "step": 12502 }, { "epoch": 0.8, "grad_norm": 1.449702313103763, "learning_rate": 1.0099506088540418e-06, "loss": 0.6173, "step": 12503 }, { "epoch": 0.8, "grad_norm": 1.5257843952201828, "learning_rate": 1.0093260202177651e-06, "loss": 0.6315, "step": 12504 }, { "epoch": 0.8, "grad_norm": 1.8516244574923477, "learning_rate": 1.0087016030922709e-06, "loss": 0.7176, "step": 12505 }, { "epoch": 0.8, "grad_norm": 1.7444159394250487, "learning_rate": 1.008077357504395e-06, "loss": 0.7417, "step": 12506 }, { "epoch": 0.8, "grad_norm": 1.7956423567097441, "learning_rate": 1.0074532834809647e-06, "loss": 0.6531, "step": 12507 }, { "epoch": 0.8, "grad_norm": 1.561078364385467, "learning_rate": 1.006829381048805e-06, "loss": 0.6486, "step": 12508 }, { "epoch": 0.8, "grad_norm": 1.056833687423757, "learning_rate": 1.0062056502347257e-06, "loss": 0.7409, "step": 12509 }, { "epoch": 0.8, "grad_norm": 1.9708781859340887, "learning_rate": 1.005582091065536e-06, "loss": 0.6162, "step": 12510 }, { "epoch": 0.8, "grad_norm": 1.1562764872318307, "learning_rate": 1.0049587035680353e-06, "loss": 0.6796, "step": 12511 }, { "epoch": 0.8, "grad_norm": 2.3237401305517555, "learning_rate": 1.0043354877690148e-06, "loss": 0.5373, "step": 12512 }, { "epoch": 0.8, "grad_norm": 1.7410825767236122, "learning_rate": 1.003712443695259e-06, "loss": 0.7069, "step": 12513 }, { "epoch": 0.8, "grad_norm": 1.721318704500957, "learning_rate": 1.0030895713735444e-06, "loss": 0.626, "step": 12514 }, { "epoch": 0.8, "grad_norm": 1.4252488042874778, "learning_rate": 1.0024668708306418e-06, "loss": 0.6049, "step": 12515 }, { "epoch": 0.8, "grad_norm": 1.656088951749966, "learning_rate": 1.0018443420933117e-06, "loss": 0.6879, "step": 12516 }, { "epoch": 0.8, "grad_norm": 1.2358493371976143, "learning_rate": 1.001221985188312e-06, "loss": 0.7561, "step": 12517 }, { "epoch": 0.8, "grad_norm": 1.8856666809329536, "learning_rate": 1.0005998001423883e-06, "loss": 0.7052, "step": 12518 }, { "epoch": 0.8, "grad_norm": 1.4569771621964565, "learning_rate": 9.99977786982282e-07, "loss": 0.6186, "step": 12519 }, { "epoch": 0.8, "grad_norm": 1.7999162381326437, "learning_rate": 9.993559457347245e-07, "loss": 0.6782, "step": 12520 }, { "epoch": 0.8, "grad_norm": 1.5657320715808503, "learning_rate": 9.987342764264424e-07, "loss": 0.6331, "step": 12521 }, { "epoch": 0.8, "grad_norm": 1.682048141999238, "learning_rate": 9.981127790841526e-07, "loss": 0.7476, "step": 12522 }, { "epoch": 0.8, "grad_norm": 1.4403916759498423, "learning_rate": 9.974914537345675e-07, "loss": 0.6958, "step": 12523 }, { "epoch": 0.8, "grad_norm": 4.985621540287556, "learning_rate": 9.968703004043873e-07, "loss": 0.6011, "step": 12524 }, { "epoch": 0.8, "grad_norm": 1.2773793673838716, "learning_rate": 9.96249319120311e-07, "loss": 0.6434, "step": 12525 }, { "epoch": 0.8, "grad_norm": 1.244824083996462, "learning_rate": 9.956285099090262e-07, "loss": 0.6257, "step": 12526 }, { "epoch": 0.8, "grad_norm": 1.0443205695592024, "learning_rate": 9.95007872797214e-07, "loss": 0.6099, "step": 12527 }, { "epoch": 0.8, "grad_norm": 1.5400019262245628, "learning_rate": 9.943874078115473e-07, "loss": 0.66, "step": 12528 }, { "epoch": 0.8, "grad_norm": 1.335338229570666, "learning_rate": 9.937671149786933e-07, "loss": 0.6956, "step": 12529 }, { "epoch": 0.8, "grad_norm": 1.4530921654423445, "learning_rate": 9.931469943253103e-07, "loss": 0.578, "step": 12530 }, { "epoch": 0.8, "grad_norm": 5.160722655218078, "learning_rate": 9.925270458780496e-07, "loss": 0.6527, "step": 12531 }, { "epoch": 0.8, "grad_norm": 1.5656382465572634, "learning_rate": 9.919072696635563e-07, "loss": 0.6455, "step": 12532 }, { "epoch": 0.8, "grad_norm": 1.862262900753776, "learning_rate": 9.91287665708464e-07, "loss": 0.6782, "step": 12533 }, { "epoch": 0.8, "grad_norm": 1.6175122744341832, "learning_rate": 9.906682340394064e-07, "loss": 0.7287, "step": 12534 }, { "epoch": 0.8, "grad_norm": 1.1228683351591375, "learning_rate": 9.900489746830034e-07, "loss": 0.6551, "step": 12535 }, { "epoch": 0.8, "grad_norm": 1.7463714213234405, "learning_rate": 9.894298876658692e-07, "loss": 0.6556, "step": 12536 }, { "epoch": 0.8, "grad_norm": 1.5414499872051872, "learning_rate": 9.888109730146112e-07, "loss": 0.6457, "step": 12537 }, { "epoch": 0.8, "grad_norm": 1.3955235431633288, "learning_rate": 9.88192230755829e-07, "loss": 0.702, "step": 12538 }, { "epoch": 0.8, "grad_norm": 1.6319488202283452, "learning_rate": 9.87573660916114e-07, "loss": 0.6631, "step": 12539 }, { "epoch": 0.8, "grad_norm": 1.096631162308887, "learning_rate": 9.869552635220515e-07, "loss": 0.6426, "step": 12540 }, { "epoch": 0.8, "grad_norm": 1.4377798005323743, "learning_rate": 9.863370386002214e-07, "loss": 0.6366, "step": 12541 }, { "epoch": 0.8, "grad_norm": 1.5591718607972433, "learning_rate": 9.857189861771887e-07, "loss": 0.6631, "step": 12542 }, { "epoch": 0.8, "grad_norm": 1.2237729710078837, "learning_rate": 9.851011062795201e-07, "loss": 0.6143, "step": 12543 }, { "epoch": 0.8, "grad_norm": 1.6570668991315451, "learning_rate": 9.8448339893377e-07, "loss": 0.6972, "step": 12544 }, { "epoch": 0.8, "grad_norm": 1.5141667480749506, "learning_rate": 9.83865864166485e-07, "loss": 0.6868, "step": 12545 }, { "epoch": 0.8, "grad_norm": 1.8042511935335623, "learning_rate": 9.832485020042065e-07, "loss": 0.7326, "step": 12546 }, { "epoch": 0.8, "grad_norm": 1.308846114094907, "learning_rate": 9.826313124734654e-07, "loss": 0.6115, "step": 12547 }, { "epoch": 0.8, "grad_norm": 1.3038617697146389, "learning_rate": 9.820142956007917e-07, "loss": 0.6433, "step": 12548 }, { "epoch": 0.8, "grad_norm": 2.3988670153738654, "learning_rate": 9.813974514126977e-07, "loss": 0.5966, "step": 12549 }, { "epoch": 0.8, "grad_norm": 1.6794823054841308, "learning_rate": 9.807807799357e-07, "loss": 0.5752, "step": 12550 }, { "epoch": 0.8, "grad_norm": 1.472319175049075, "learning_rate": 9.801642811962964e-07, "loss": 0.6483, "step": 12551 }, { "epoch": 0.8, "grad_norm": 1.6436124790631044, "learning_rate": 9.795479552209857e-07, "loss": 0.727, "step": 12552 }, { "epoch": 0.8, "grad_norm": 1.5277661122322288, "learning_rate": 9.789318020362564e-07, "loss": 0.5902, "step": 12553 }, { "epoch": 0.8, "grad_norm": 1.4302664163975793, "learning_rate": 9.783158216685874e-07, "loss": 0.539, "step": 12554 }, { "epoch": 0.8, "grad_norm": 1.5340473279291271, "learning_rate": 9.777000141444564e-07, "loss": 0.6655, "step": 12555 }, { "epoch": 0.8, "grad_norm": 1.643862157512923, "learning_rate": 9.77084379490324e-07, "loss": 0.6426, "step": 12556 }, { "epoch": 0.8, "grad_norm": 1.5902483847371767, "learning_rate": 9.764689177326542e-07, "loss": 0.6658, "step": 12557 }, { "epoch": 0.8, "grad_norm": 1.5643951864771823, "learning_rate": 9.758536288978932e-07, "loss": 0.7174, "step": 12558 }, { "epoch": 0.8, "grad_norm": 1.4892913382901825, "learning_rate": 9.75238513012488e-07, "loss": 0.6836, "step": 12559 }, { "epoch": 0.8, "grad_norm": 1.4740665931781003, "learning_rate": 9.74623570102875e-07, "loss": 0.6895, "step": 12560 }, { "epoch": 0.8, "grad_norm": 1.6118470007901382, "learning_rate": 9.74008800195481e-07, "loss": 0.7106, "step": 12561 }, { "epoch": 0.8, "grad_norm": 1.5453632288424475, "learning_rate": 9.73394203316732e-07, "loss": 0.6167, "step": 12562 }, { "epoch": 0.8, "grad_norm": 1.5316836001997742, "learning_rate": 9.727797794930361e-07, "loss": 0.6745, "step": 12563 }, { "epoch": 0.8, "grad_norm": 1.1198153641608288, "learning_rate": 9.721655287508052e-07, "loss": 0.612, "step": 12564 }, { "epoch": 0.8, "grad_norm": 1.3381895471450886, "learning_rate": 9.715514511164343e-07, "loss": 0.6081, "step": 12565 }, { "epoch": 0.8, "grad_norm": 1.5051249106947326, "learning_rate": 9.709375466163178e-07, "loss": 0.6512, "step": 12566 }, { "epoch": 0.8, "grad_norm": 1.1001430943849628, "learning_rate": 9.703238152768402e-07, "loss": 0.6609, "step": 12567 }, { "epoch": 0.8, "grad_norm": 1.5098566802470486, "learning_rate": 9.697102571243767e-07, "loss": 0.6204, "step": 12568 }, { "epoch": 0.8, "grad_norm": 1.0280756604146783, "learning_rate": 9.690968721852978e-07, "loss": 0.6337, "step": 12569 }, { "epoch": 0.8, "grad_norm": 1.690653858347582, "learning_rate": 9.684836604859637e-07, "loss": 0.7324, "step": 12570 }, { "epoch": 0.8, "grad_norm": 1.7554345669637088, "learning_rate": 9.678706220527333e-07, "loss": 0.5856, "step": 12571 }, { "epoch": 0.8, "grad_norm": 1.8885739518058937, "learning_rate": 9.672577569119484e-07, "loss": 0.6787, "step": 12572 }, { "epoch": 0.8, "grad_norm": 0.9995348162233918, "learning_rate": 9.666450650899533e-07, "loss": 0.5289, "step": 12573 }, { "epoch": 0.8, "grad_norm": 1.6179786548864001, "learning_rate": 9.66032546613076e-07, "loss": 0.636, "step": 12574 }, { "epoch": 0.8, "grad_norm": 1.5240780510995782, "learning_rate": 9.654202015076442e-07, "loss": 0.6058, "step": 12575 }, { "epoch": 0.8, "grad_norm": 3.231592541990182, "learning_rate": 9.648080297999746e-07, "loss": 0.6636, "step": 12576 }, { "epoch": 0.81, "grad_norm": 1.584406831040319, "learning_rate": 9.641960315163768e-07, "loss": 0.6732, "step": 12577 }, { "epoch": 0.81, "grad_norm": 1.6083004276683515, "learning_rate": 9.63584206683153e-07, "loss": 0.707, "step": 12578 }, { "epoch": 0.81, "grad_norm": 1.6238296683984206, "learning_rate": 9.629725553265983e-07, "loss": 0.5945, "step": 12579 }, { "epoch": 0.81, "grad_norm": 1.4990470534411977, "learning_rate": 9.623610774730002e-07, "loss": 0.6777, "step": 12580 }, { "epoch": 0.81, "grad_norm": 1.6556947699850675, "learning_rate": 9.617497731486374e-07, "loss": 0.7434, "step": 12581 }, { "epoch": 0.81, "grad_norm": 1.463312368541714, "learning_rate": 9.611386423797852e-07, "loss": 0.6272, "step": 12582 }, { "epoch": 0.81, "grad_norm": 1.1389733712847525, "learning_rate": 9.605276851927075e-07, "loss": 0.6218, "step": 12583 }, { "epoch": 0.81, "grad_norm": 1.5273044628193342, "learning_rate": 9.599169016136617e-07, "loss": 0.6458, "step": 12584 }, { "epoch": 0.81, "grad_norm": 1.5271630902158138, "learning_rate": 9.593062916688982e-07, "loss": 0.7868, "step": 12585 }, { "epoch": 0.81, "grad_norm": 1.6123024167608733, "learning_rate": 9.586958553846592e-07, "loss": 0.6406, "step": 12586 }, { "epoch": 0.81, "grad_norm": 1.5828953369903462, "learning_rate": 9.580855927871808e-07, "loss": 0.6851, "step": 12587 }, { "epoch": 0.81, "grad_norm": 2.004981731762982, "learning_rate": 9.574755039026901e-07, "loss": 0.7817, "step": 12588 }, { "epoch": 0.81, "grad_norm": 1.7631242257704045, "learning_rate": 9.56865588757407e-07, "loss": 0.6861, "step": 12589 }, { "epoch": 0.81, "grad_norm": 1.5669348871923352, "learning_rate": 9.562558473775458e-07, "loss": 0.6459, "step": 12590 }, { "epoch": 0.81, "grad_norm": 1.8504617081873664, "learning_rate": 9.556462797893113e-07, "loss": 0.6877, "step": 12591 }, { "epoch": 0.81, "grad_norm": 1.754028463600359, "learning_rate": 9.550368860189013e-07, "loss": 0.6805, "step": 12592 }, { "epoch": 0.81, "grad_norm": 1.6902099696108948, "learning_rate": 9.544276660925067e-07, "loss": 0.661, "step": 12593 }, { "epoch": 0.81, "grad_norm": 1.3506970570609307, "learning_rate": 9.538186200363098e-07, "loss": 0.6039, "step": 12594 }, { "epoch": 0.81, "grad_norm": 1.608834679420956, "learning_rate": 9.532097478764862e-07, "loss": 0.6372, "step": 12595 }, { "epoch": 0.81, "grad_norm": 1.5767129548318146, "learning_rate": 9.526010496392029e-07, "loss": 0.7007, "step": 12596 }, { "epoch": 0.81, "grad_norm": 1.4963032344070384, "learning_rate": 9.519925253506246e-07, "loss": 0.6281, "step": 12597 }, { "epoch": 0.81, "grad_norm": 1.7486570601687677, "learning_rate": 9.513841750368991e-07, "loss": 0.7334, "step": 12598 }, { "epoch": 0.81, "grad_norm": 1.4829153028454771, "learning_rate": 9.507759987241755e-07, "loss": 0.6481, "step": 12599 }, { "epoch": 0.81, "grad_norm": 1.5073482604255388, "learning_rate": 9.501679964385907e-07, "loss": 0.6601, "step": 12600 }, { "epoch": 0.81, "grad_norm": 1.7053034693548694, "learning_rate": 9.495601682062755e-07, "loss": 0.5953, "step": 12601 }, { "epoch": 0.81, "grad_norm": 1.6565598236329635, "learning_rate": 9.489525140533534e-07, "loss": 0.6462, "step": 12602 }, { "epoch": 0.81, "grad_norm": 1.5040944434573464, "learning_rate": 9.483450340059386e-07, "loss": 0.6674, "step": 12603 }, { "epoch": 0.81, "grad_norm": 1.1146549735121856, "learning_rate": 9.477377280901428e-07, "loss": 0.5892, "step": 12604 }, { "epoch": 0.81, "grad_norm": 1.0253578182750958, "learning_rate": 9.47130596332062e-07, "loss": 0.6422, "step": 12605 }, { "epoch": 0.81, "grad_norm": 1.5850808719742246, "learning_rate": 9.465236387577947e-07, "loss": 0.6185, "step": 12606 }, { "epoch": 0.81, "grad_norm": 1.4939739638304947, "learning_rate": 9.459168553934211e-07, "loss": 0.6577, "step": 12607 }, { "epoch": 0.81, "grad_norm": 1.011745273848145, "learning_rate": 9.453102462650232e-07, "loss": 0.6212, "step": 12608 }, { "epoch": 0.81, "grad_norm": 1.6541413831148417, "learning_rate": 9.447038113986717e-07, "loss": 0.7779, "step": 12609 }, { "epoch": 0.81, "grad_norm": 1.5221367031697903, "learning_rate": 9.440975508204286e-07, "loss": 0.6916, "step": 12610 }, { "epoch": 0.81, "grad_norm": 1.1390385566347712, "learning_rate": 9.434914645563498e-07, "loss": 0.6534, "step": 12611 }, { "epoch": 0.81, "grad_norm": 1.539600073207873, "learning_rate": 9.428855526324831e-07, "loss": 0.705, "step": 12612 }, { "epoch": 0.81, "grad_norm": 1.3627614427422845, "learning_rate": 9.422798150748724e-07, "loss": 0.6399, "step": 12613 }, { "epoch": 0.81, "grad_norm": 1.7792209747076848, "learning_rate": 9.416742519095467e-07, "loss": 0.6166, "step": 12614 }, { "epoch": 0.81, "grad_norm": 1.612825749578959, "learning_rate": 9.410688631625364e-07, "loss": 0.7349, "step": 12615 }, { "epoch": 0.81, "grad_norm": 1.7192593948597323, "learning_rate": 9.404636488598545e-07, "loss": 0.7698, "step": 12616 }, { "epoch": 0.81, "grad_norm": 1.498281776290097, "learning_rate": 9.398586090275164e-07, "loss": 0.5484, "step": 12617 }, { "epoch": 0.81, "grad_norm": 1.5987824139732938, "learning_rate": 9.392537436915234e-07, "loss": 0.695, "step": 12618 }, { "epoch": 0.81, "grad_norm": 1.837775558448974, "learning_rate": 9.386490528778702e-07, "loss": 0.615, "step": 12619 }, { "epoch": 0.81, "grad_norm": 1.7918982205468623, "learning_rate": 9.380445366125496e-07, "loss": 0.6499, "step": 12620 }, { "epoch": 0.81, "grad_norm": 1.4923382864638355, "learning_rate": 9.374401949215367e-07, "loss": 0.6918, "step": 12621 }, { "epoch": 0.81, "grad_norm": 1.4015150376916912, "learning_rate": 9.368360278308103e-07, "loss": 0.6004, "step": 12622 }, { "epoch": 0.81, "grad_norm": 1.6477598824242763, "learning_rate": 9.362320353663313e-07, "loss": 0.7317, "step": 12623 }, { "epoch": 0.81, "grad_norm": 1.7711755340085875, "learning_rate": 9.356282175540609e-07, "loss": 0.6818, "step": 12624 }, { "epoch": 0.81, "grad_norm": 1.916103074846992, "learning_rate": 9.350245744199499e-07, "loss": 0.7042, "step": 12625 }, { "epoch": 0.81, "grad_norm": 1.773349714699662, "learning_rate": 9.344211059899394e-07, "loss": 0.6907, "step": 12626 }, { "epoch": 0.81, "grad_norm": 1.6522331555660679, "learning_rate": 9.338178122899693e-07, "loss": 0.6007, "step": 12627 }, { "epoch": 0.81, "grad_norm": 1.4952211579984223, "learning_rate": 9.332146933459629e-07, "loss": 0.6467, "step": 12628 }, { "epoch": 0.81, "grad_norm": 0.9949120570676127, "learning_rate": 9.32611749183846e-07, "loss": 0.6538, "step": 12629 }, { "epoch": 0.81, "grad_norm": 1.6645311779989929, "learning_rate": 9.320089798295268e-07, "loss": 0.6067, "step": 12630 }, { "epoch": 0.81, "grad_norm": 1.7373623879998543, "learning_rate": 9.31406385308915e-07, "loss": 0.6845, "step": 12631 }, { "epoch": 0.81, "grad_norm": 1.5067381687253454, "learning_rate": 9.308039656479073e-07, "loss": 0.6284, "step": 12632 }, { "epoch": 0.81, "grad_norm": 1.8306011698212066, "learning_rate": 9.302017208723951e-07, "loss": 0.5909, "step": 12633 }, { "epoch": 0.81, "grad_norm": 1.5841397462112639, "learning_rate": 9.295996510082605e-07, "loss": 0.6397, "step": 12634 }, { "epoch": 0.81, "grad_norm": 1.4759911096834986, "learning_rate": 9.289977560813789e-07, "loss": 0.5645, "step": 12635 }, { "epoch": 0.81, "grad_norm": 1.7426294257889763, "learning_rate": 9.28396036117622e-07, "loss": 0.6226, "step": 12636 }, { "epoch": 0.81, "grad_norm": 1.4653128914715414, "learning_rate": 9.27794491142845e-07, "loss": 0.7322, "step": 12637 }, { "epoch": 0.81, "grad_norm": 2.3954544391132293, "learning_rate": 9.271931211829055e-07, "loss": 0.6857, "step": 12638 }, { "epoch": 0.81, "grad_norm": 1.5823244171451933, "learning_rate": 9.265919262636469e-07, "loss": 0.6418, "step": 12639 }, { "epoch": 0.81, "grad_norm": 1.5617480010265792, "learning_rate": 9.259909064109085e-07, "loss": 0.6282, "step": 12640 }, { "epoch": 0.81, "grad_norm": 1.6421314450315583, "learning_rate": 9.253900616505202e-07, "loss": 0.6944, "step": 12641 }, { "epoch": 0.81, "grad_norm": 1.5523471807885163, "learning_rate": 9.247893920083045e-07, "loss": 0.6075, "step": 12642 }, { "epoch": 0.81, "grad_norm": 1.527059038191076, "learning_rate": 9.241888975100782e-07, "loss": 0.5872, "step": 12643 }, { "epoch": 0.81, "grad_norm": 1.9271013928651008, "learning_rate": 9.235885781816483e-07, "loss": 0.7269, "step": 12644 }, { "epoch": 0.81, "grad_norm": 1.475918431940373, "learning_rate": 9.229884340488149e-07, "loss": 0.6835, "step": 12645 }, { "epoch": 0.81, "grad_norm": 1.4615326607493953, "learning_rate": 9.223884651373722e-07, "loss": 0.6283, "step": 12646 }, { "epoch": 0.81, "grad_norm": 1.6390060415146752, "learning_rate": 9.217886714731056e-07, "loss": 0.7152, "step": 12647 }, { "epoch": 0.81, "grad_norm": 1.616521555581995, "learning_rate": 9.211890530817919e-07, "loss": 0.6717, "step": 12648 }, { "epoch": 0.81, "grad_norm": 1.5334874260336484, "learning_rate": 9.205896099892019e-07, "loss": 0.7141, "step": 12649 }, { "epoch": 0.81, "grad_norm": 1.562014508628147, "learning_rate": 9.199903422210988e-07, "loss": 0.6836, "step": 12650 }, { "epoch": 0.81, "grad_norm": 1.5172702393902435, "learning_rate": 9.193912498032376e-07, "loss": 0.6161, "step": 12651 }, { "epoch": 0.81, "grad_norm": 1.5103516928317955, "learning_rate": 9.187923327613651e-07, "loss": 0.689, "step": 12652 }, { "epoch": 0.81, "grad_norm": 1.5461472554099298, "learning_rate": 9.181935911212231e-07, "loss": 0.6459, "step": 12653 }, { "epoch": 0.81, "grad_norm": 1.8200138172924312, "learning_rate": 9.175950249085424e-07, "loss": 0.7988, "step": 12654 }, { "epoch": 0.81, "grad_norm": 1.488311203702566, "learning_rate": 9.169966341490499e-07, "loss": 0.5993, "step": 12655 }, { "epoch": 0.81, "grad_norm": 1.4211692612108142, "learning_rate": 9.163984188684627e-07, "loss": 0.6929, "step": 12656 }, { "epoch": 0.81, "grad_norm": 1.5010055485373188, "learning_rate": 9.158003790924908e-07, "loss": 0.6924, "step": 12657 }, { "epoch": 0.81, "grad_norm": 1.8153917214667874, "learning_rate": 9.152025148468363e-07, "loss": 0.6933, "step": 12658 }, { "epoch": 0.81, "grad_norm": 1.219137926102456, "learning_rate": 9.146048261571944e-07, "loss": 0.6714, "step": 12659 }, { "epoch": 0.81, "grad_norm": 1.6132040205129363, "learning_rate": 9.140073130492528e-07, "loss": 0.7666, "step": 12660 }, { "epoch": 0.81, "grad_norm": 1.9119618232032702, "learning_rate": 9.134099755486892e-07, "loss": 0.7221, "step": 12661 }, { "epoch": 0.81, "grad_norm": 1.4549591160231319, "learning_rate": 9.128128136811809e-07, "loss": 0.589, "step": 12662 }, { "epoch": 0.81, "grad_norm": 1.5198983727929156, "learning_rate": 9.122158274723863e-07, "loss": 0.6419, "step": 12663 }, { "epoch": 0.81, "grad_norm": 1.4324418241815575, "learning_rate": 9.116190169479678e-07, "loss": 0.6788, "step": 12664 }, { "epoch": 0.81, "grad_norm": 1.5217326075933901, "learning_rate": 9.110223821335723e-07, "loss": 0.6355, "step": 12665 }, { "epoch": 0.81, "grad_norm": 1.6137287431619507, "learning_rate": 9.104259230548435e-07, "loss": 0.6766, "step": 12666 }, { "epoch": 0.81, "grad_norm": 1.6318444776053638, "learning_rate": 9.098296397374146e-07, "loss": 0.7035, "step": 12667 }, { "epoch": 0.81, "grad_norm": 1.430829084929639, "learning_rate": 9.092335322069118e-07, "loss": 0.6692, "step": 12668 }, { "epoch": 0.81, "grad_norm": 1.5923612419755215, "learning_rate": 9.086376004889591e-07, "loss": 0.6894, "step": 12669 }, { "epoch": 0.81, "grad_norm": 1.5356423683723885, "learning_rate": 9.080418446091622e-07, "loss": 0.6138, "step": 12670 }, { "epoch": 0.81, "grad_norm": 1.4978681371746436, "learning_rate": 9.074462645931309e-07, "loss": 0.5915, "step": 12671 }, { "epoch": 0.81, "grad_norm": 1.5757687726987257, "learning_rate": 9.068508604664572e-07, "loss": 0.7358, "step": 12672 }, { "epoch": 0.81, "grad_norm": 1.490808544740146, "learning_rate": 9.062556322547333e-07, "loss": 0.644, "step": 12673 }, { "epoch": 0.81, "grad_norm": 1.4962758176677933, "learning_rate": 9.056605799835411e-07, "loss": 0.7414, "step": 12674 }, { "epoch": 0.81, "grad_norm": 1.8193338173700002, "learning_rate": 9.050657036784516e-07, "loss": 0.7918, "step": 12675 }, { "epoch": 0.81, "grad_norm": 1.1492644341520288, "learning_rate": 9.044710033650367e-07, "loss": 0.5456, "step": 12676 }, { "epoch": 0.81, "grad_norm": 1.4416916144195306, "learning_rate": 9.038764790688492e-07, "loss": 0.616, "step": 12677 }, { "epoch": 0.81, "grad_norm": 1.5024139795832574, "learning_rate": 9.032821308154465e-07, "loss": 0.6095, "step": 12678 }, { "epoch": 0.81, "grad_norm": 1.9940917093624069, "learning_rate": 9.026879586303666e-07, "loss": 0.6335, "step": 12679 }, { "epoch": 0.81, "grad_norm": 1.6178571185020472, "learning_rate": 9.0209396253915e-07, "loss": 0.6158, "step": 12680 }, { "epoch": 0.81, "grad_norm": 1.51234944738063, "learning_rate": 9.015001425673242e-07, "loss": 0.7065, "step": 12681 }, { "epoch": 0.81, "grad_norm": 1.131305943539403, "learning_rate": 9.009064987404098e-07, "loss": 0.6742, "step": 12682 }, { "epoch": 0.81, "grad_norm": 1.36965557566452, "learning_rate": 9.003130310839203e-07, "loss": 0.7255, "step": 12683 }, { "epoch": 0.81, "grad_norm": 1.267247994589405, "learning_rate": 8.997197396233615e-07, "loss": 0.6866, "step": 12684 }, { "epoch": 0.81, "grad_norm": 1.0971247615379907, "learning_rate": 8.99126624384235e-07, "loss": 0.7756, "step": 12685 }, { "epoch": 0.81, "grad_norm": 1.6589906461211166, "learning_rate": 8.985336853920262e-07, "loss": 0.6789, "step": 12686 }, { "epoch": 0.81, "grad_norm": 1.72337205773964, "learning_rate": 8.979409226722224e-07, "loss": 0.6776, "step": 12687 }, { "epoch": 0.81, "grad_norm": 1.4602814647233417, "learning_rate": 8.973483362502983e-07, "loss": 0.67, "step": 12688 }, { "epoch": 0.81, "grad_norm": 1.703662086056881, "learning_rate": 8.967559261517217e-07, "loss": 0.6863, "step": 12689 }, { "epoch": 0.81, "grad_norm": 1.6192804719958664, "learning_rate": 8.961636924019534e-07, "loss": 0.7216, "step": 12690 }, { "epoch": 0.81, "grad_norm": 1.4847163503330802, "learning_rate": 8.955716350264454e-07, "loss": 0.7079, "step": 12691 }, { "epoch": 0.81, "grad_norm": 1.695507928415136, "learning_rate": 8.94979754050646e-07, "loss": 0.6092, "step": 12692 }, { "epoch": 0.81, "grad_norm": 1.585492318303441, "learning_rate": 8.943880494999884e-07, "loss": 0.5733, "step": 12693 }, { "epoch": 0.81, "grad_norm": 1.381148422765967, "learning_rate": 8.937965213999084e-07, "loss": 0.5482, "step": 12694 }, { "epoch": 0.81, "grad_norm": 1.2388531471274749, "learning_rate": 8.932051697758227e-07, "loss": 0.7461, "step": 12695 }, { "epoch": 0.81, "grad_norm": 1.6173516031936868, "learning_rate": 8.926139946531504e-07, "loss": 0.7054, "step": 12696 }, { "epoch": 0.81, "grad_norm": 1.7287729294023007, "learning_rate": 8.920229960572973e-07, "loss": 0.7515, "step": 12697 }, { "epoch": 0.81, "grad_norm": 1.5905831661509726, "learning_rate": 8.914321740136644e-07, "loss": 0.6906, "step": 12698 }, { "epoch": 0.81, "grad_norm": 1.6929786380863727, "learning_rate": 8.908415285476435e-07, "loss": 0.6602, "step": 12699 }, { "epoch": 0.81, "grad_norm": 1.431532945974284, "learning_rate": 8.902510596846176e-07, "loss": 0.6624, "step": 12700 }, { "epoch": 0.81, "grad_norm": 1.5827698316403862, "learning_rate": 8.89660767449968e-07, "loss": 0.7318, "step": 12701 }, { "epoch": 0.81, "grad_norm": 1.104933243117581, "learning_rate": 8.890706518690589e-07, "loss": 0.6041, "step": 12702 }, { "epoch": 0.81, "grad_norm": 1.559281723851078, "learning_rate": 8.884807129672568e-07, "loss": 0.7546, "step": 12703 }, { "epoch": 0.81, "grad_norm": 2.5541161164360227, "learning_rate": 8.878909507699135e-07, "loss": 0.7445, "step": 12704 }, { "epoch": 0.81, "grad_norm": 1.54398721176808, "learning_rate": 8.873013653023765e-07, "loss": 0.6375, "step": 12705 }, { "epoch": 0.81, "grad_norm": 1.6483189659329822, "learning_rate": 8.867119565899851e-07, "loss": 0.6637, "step": 12706 }, { "epoch": 0.81, "grad_norm": 1.4810328672207749, "learning_rate": 8.861227246580706e-07, "loss": 0.6926, "step": 12707 }, { "epoch": 0.81, "grad_norm": 1.5129001132140734, "learning_rate": 8.855336695319572e-07, "loss": 0.6341, "step": 12708 }, { "epoch": 0.81, "grad_norm": 1.4883199899009427, "learning_rate": 8.849447912369591e-07, "loss": 0.6234, "step": 12709 }, { "epoch": 0.81, "grad_norm": 1.7409798653203352, "learning_rate": 8.843560897983883e-07, "loss": 0.7473, "step": 12710 }, { "epoch": 0.81, "grad_norm": 1.4739741409380462, "learning_rate": 8.837675652415451e-07, "loss": 0.6857, "step": 12711 }, { "epoch": 0.81, "grad_norm": 1.4081808859346514, "learning_rate": 8.831792175917219e-07, "loss": 0.6274, "step": 12712 }, { "epoch": 0.81, "grad_norm": 1.719704123750189, "learning_rate": 8.82591046874206e-07, "loss": 0.641, "step": 12713 }, { "epoch": 0.81, "grad_norm": 1.4068587702162583, "learning_rate": 8.820030531142748e-07, "loss": 0.5854, "step": 12714 }, { "epoch": 0.81, "grad_norm": 1.5093731877234478, "learning_rate": 8.814152363371992e-07, "loss": 0.6398, "step": 12715 }, { "epoch": 0.81, "grad_norm": 1.5508172126819497, "learning_rate": 8.808275965682423e-07, "loss": 0.6831, "step": 12716 }, { "epoch": 0.81, "grad_norm": 1.3385682272360122, "learning_rate": 8.802401338326582e-07, "loss": 0.632, "step": 12717 }, { "epoch": 0.81, "grad_norm": 1.4878377042521829, "learning_rate": 8.796528481556992e-07, "loss": 0.6697, "step": 12718 }, { "epoch": 0.81, "grad_norm": 1.653247297639659, "learning_rate": 8.790657395626001e-07, "loss": 0.6541, "step": 12719 }, { "epoch": 0.81, "grad_norm": 1.5643210361757698, "learning_rate": 8.784788080785978e-07, "loss": 0.6984, "step": 12720 }, { "epoch": 0.81, "grad_norm": 1.1658611472713165, "learning_rate": 8.778920537289154e-07, "loss": 0.7256, "step": 12721 }, { "epoch": 0.81, "grad_norm": 1.5335879662406249, "learning_rate": 8.773054765387712e-07, "loss": 0.5537, "step": 12722 }, { "epoch": 0.81, "grad_norm": 1.6067047358166526, "learning_rate": 8.767190765333744e-07, "loss": 0.6905, "step": 12723 }, { "epoch": 0.81, "grad_norm": 1.5799090085371674, "learning_rate": 8.761328537379277e-07, "loss": 0.6231, "step": 12724 }, { "epoch": 0.81, "grad_norm": 1.166137462102895, "learning_rate": 8.755468081776252e-07, "loss": 0.6207, "step": 12725 }, { "epoch": 0.81, "grad_norm": 1.544854894492982, "learning_rate": 8.749609398776531e-07, "loss": 0.6618, "step": 12726 }, { "epoch": 0.81, "grad_norm": 1.6817606306137483, "learning_rate": 8.743752488631946e-07, "loss": 0.6489, "step": 12727 }, { "epoch": 0.81, "grad_norm": 1.001239707542365, "learning_rate": 8.73789735159416e-07, "loss": 0.6528, "step": 12728 }, { "epoch": 0.81, "grad_norm": 1.4710432294846847, "learning_rate": 8.732043987914856e-07, "loss": 0.5937, "step": 12729 }, { "epoch": 0.81, "grad_norm": 1.0745409476586312, "learning_rate": 8.726192397845585e-07, "loss": 0.551, "step": 12730 }, { "epoch": 0.81, "grad_norm": 1.497291018091601, "learning_rate": 8.720342581637836e-07, "loss": 0.6447, "step": 12731 }, { "epoch": 0.81, "grad_norm": 1.925674147052701, "learning_rate": 8.714494539543022e-07, "loss": 0.715, "step": 12732 }, { "epoch": 0.81, "grad_norm": 1.4952137782599453, "learning_rate": 8.708648271812469e-07, "loss": 0.5691, "step": 12733 }, { "epoch": 0.82, "grad_norm": 1.0629692136970337, "learning_rate": 8.702803778697472e-07, "loss": 0.7372, "step": 12734 }, { "epoch": 0.82, "grad_norm": 1.7097490176807244, "learning_rate": 8.696961060449166e-07, "loss": 0.6596, "step": 12735 }, { "epoch": 0.82, "grad_norm": 1.7816450845920082, "learning_rate": 8.691120117318708e-07, "loss": 0.7817, "step": 12736 }, { "epoch": 0.82, "grad_norm": 1.7184102391097198, "learning_rate": 8.685280949557084e-07, "loss": 0.6702, "step": 12737 }, { "epoch": 0.82, "grad_norm": 1.1475364672566084, "learning_rate": 8.67944355741528e-07, "loss": 0.7196, "step": 12738 }, { "epoch": 0.82, "grad_norm": 1.5733493069650566, "learning_rate": 8.673607941144169e-07, "loss": 0.6833, "step": 12739 }, { "epoch": 0.82, "grad_norm": 1.8683730467450104, "learning_rate": 8.667774100994536e-07, "loss": 0.6303, "step": 12740 }, { "epoch": 0.82, "grad_norm": 1.4807535552397368, "learning_rate": 8.661942037217141e-07, "loss": 0.6424, "step": 12741 }, { "epoch": 0.82, "grad_norm": 1.4076421376721413, "learning_rate": 8.656111750062596e-07, "loss": 0.5455, "step": 12742 }, { "epoch": 0.82, "grad_norm": 1.6743571615788182, "learning_rate": 8.650283239781515e-07, "loss": 0.6215, "step": 12743 }, { "epoch": 0.82, "grad_norm": 1.4032485789734312, "learning_rate": 8.644456506624343e-07, "loss": 0.5984, "step": 12744 }, { "epoch": 0.82, "grad_norm": 1.3753331589984352, "learning_rate": 8.638631550841553e-07, "loss": 0.6047, "step": 12745 }, { "epoch": 0.82, "grad_norm": 1.2182843925262241, "learning_rate": 8.63280837268346e-07, "loss": 0.7067, "step": 12746 }, { "epoch": 0.82, "grad_norm": 1.6419784793897287, "learning_rate": 8.626986972400326e-07, "loss": 0.6618, "step": 12747 }, { "epoch": 0.82, "grad_norm": 1.9720622851773089, "learning_rate": 8.621167350242382e-07, "loss": 0.7063, "step": 12748 }, { "epoch": 0.82, "grad_norm": 1.4070563281533537, "learning_rate": 8.615349506459691e-07, "loss": 0.6194, "step": 12749 }, { "epoch": 0.82, "grad_norm": 1.013357581519913, "learning_rate": 8.609533441302342e-07, "loss": 0.5568, "step": 12750 }, { "epoch": 0.82, "grad_norm": 1.5042387240210036, "learning_rate": 8.603719155020246e-07, "loss": 0.6375, "step": 12751 }, { "epoch": 0.82, "grad_norm": 1.5220054675910626, "learning_rate": 8.59790664786333e-07, "loss": 0.6189, "step": 12752 }, { "epoch": 0.82, "grad_norm": 1.6965299123295388, "learning_rate": 8.592095920081383e-07, "loss": 0.6646, "step": 12753 }, { "epoch": 0.82, "grad_norm": 1.2515703763669475, "learning_rate": 8.586286971924151e-07, "loss": 0.6659, "step": 12754 }, { "epoch": 0.82, "grad_norm": 1.665252482081839, "learning_rate": 8.580479803641279e-07, "loss": 0.6618, "step": 12755 }, { "epoch": 0.82, "grad_norm": 1.6543748687110207, "learning_rate": 8.574674415482337e-07, "loss": 0.6178, "step": 12756 }, { "epoch": 0.82, "grad_norm": 1.6081596362419648, "learning_rate": 8.568870807696872e-07, "loss": 0.6246, "step": 12757 }, { "epoch": 0.82, "grad_norm": 1.5733385738367245, "learning_rate": 8.56306898053425e-07, "loss": 0.6442, "step": 12758 }, { "epoch": 0.82, "grad_norm": 1.5164199389458657, "learning_rate": 8.557268934243868e-07, "loss": 0.7144, "step": 12759 }, { "epoch": 0.82, "grad_norm": 1.5958102788770372, "learning_rate": 8.551470669074985e-07, "loss": 0.6381, "step": 12760 }, { "epoch": 0.82, "grad_norm": 1.5711379161734356, "learning_rate": 8.545674185276792e-07, "loss": 0.6946, "step": 12761 }, { "epoch": 0.82, "grad_norm": 0.906360260383458, "learning_rate": 8.539879483098423e-07, "loss": 0.7285, "step": 12762 }, { "epoch": 0.82, "grad_norm": 1.4495857721806031, "learning_rate": 8.534086562788907e-07, "loss": 0.6469, "step": 12763 }, { "epoch": 0.82, "grad_norm": 1.4304060170612602, "learning_rate": 8.528295424597222e-07, "loss": 0.5892, "step": 12764 }, { "epoch": 0.82, "grad_norm": 2.3459820869755315, "learning_rate": 8.52250606877224e-07, "loss": 0.6475, "step": 12765 }, { "epoch": 0.82, "grad_norm": 1.8940564194884282, "learning_rate": 8.51671849556282e-07, "loss": 0.6162, "step": 12766 }, { "epoch": 0.82, "grad_norm": 1.476845170927127, "learning_rate": 8.510932705217645e-07, "loss": 0.6501, "step": 12767 }, { "epoch": 0.82, "grad_norm": 1.6815803136052434, "learning_rate": 8.50514869798541e-07, "loss": 0.6166, "step": 12768 }, { "epoch": 0.82, "grad_norm": 1.5780897394963977, "learning_rate": 8.499366474114695e-07, "loss": 0.6988, "step": 12769 }, { "epoch": 0.82, "grad_norm": 1.6540689258542147, "learning_rate": 8.493586033854007e-07, "loss": 0.709, "step": 12770 }, { "epoch": 0.82, "grad_norm": 1.6055033669101042, "learning_rate": 8.487807377451767e-07, "loss": 0.6907, "step": 12771 }, { "epoch": 0.82, "grad_norm": 1.7226796708411667, "learning_rate": 8.482030505156341e-07, "loss": 0.6823, "step": 12772 }, { "epoch": 0.82, "grad_norm": 1.4285758625457952, "learning_rate": 8.476255417216007e-07, "loss": 0.6298, "step": 12773 }, { "epoch": 0.82, "grad_norm": 1.681108966045372, "learning_rate": 8.470482113878942e-07, "loss": 0.7361, "step": 12774 }, { "epoch": 0.82, "grad_norm": 1.6593363462404238, "learning_rate": 8.464710595393306e-07, "loss": 0.6718, "step": 12775 }, { "epoch": 0.82, "grad_norm": 1.4414634446956103, "learning_rate": 8.458940862007131e-07, "loss": 0.6524, "step": 12776 }, { "epoch": 0.82, "grad_norm": 1.6445925426916865, "learning_rate": 8.453172913968382e-07, "loss": 0.5997, "step": 12777 }, { "epoch": 0.82, "grad_norm": 1.4777339702254382, "learning_rate": 8.447406751524967e-07, "loss": 0.75, "step": 12778 }, { "epoch": 0.82, "grad_norm": 1.19714573857844, "learning_rate": 8.441642374924692e-07, "loss": 0.7462, "step": 12779 }, { "epoch": 0.82, "grad_norm": 1.5218849217294064, "learning_rate": 8.435879784415302e-07, "loss": 0.5827, "step": 12780 }, { "epoch": 0.82, "grad_norm": 2.5001935209476103, "learning_rate": 8.430118980244462e-07, "loss": 0.6653, "step": 12781 }, { "epoch": 0.82, "grad_norm": 1.3348241960887213, "learning_rate": 8.424359962659745e-07, "loss": 0.6395, "step": 12782 }, { "epoch": 0.82, "grad_norm": 1.7064731864284775, "learning_rate": 8.418602731908687e-07, "loss": 0.6219, "step": 12783 }, { "epoch": 0.82, "grad_norm": 1.4901296319532367, "learning_rate": 8.412847288238712e-07, "loss": 0.5658, "step": 12784 }, { "epoch": 0.82, "grad_norm": 1.7894358013024851, "learning_rate": 8.407093631897168e-07, "loss": 0.598, "step": 12785 }, { "epoch": 0.82, "grad_norm": 1.5460707183857603, "learning_rate": 8.401341763131343e-07, "loss": 0.6506, "step": 12786 }, { "epoch": 0.82, "grad_norm": 1.5003936512539136, "learning_rate": 8.395591682188442e-07, "loss": 0.667, "step": 12787 }, { "epoch": 0.82, "grad_norm": 1.5829704906337496, "learning_rate": 8.389843389315582e-07, "loss": 0.6371, "step": 12788 }, { "epoch": 0.82, "grad_norm": 1.5547233305355574, "learning_rate": 8.384096884759807e-07, "loss": 0.6355, "step": 12789 }, { "epoch": 0.82, "grad_norm": 1.1705095431983106, "learning_rate": 8.378352168768128e-07, "loss": 0.6709, "step": 12790 }, { "epoch": 0.82, "grad_norm": 1.4890381594109894, "learning_rate": 8.372609241587387e-07, "loss": 0.6297, "step": 12791 }, { "epoch": 0.82, "grad_norm": 1.6938731285723199, "learning_rate": 8.366868103464453e-07, "loss": 0.6432, "step": 12792 }, { "epoch": 0.82, "grad_norm": 1.4825209957414776, "learning_rate": 8.361128754646025e-07, "loss": 0.6372, "step": 12793 }, { "epoch": 0.82, "grad_norm": 1.1551735666801461, "learning_rate": 8.355391195378798e-07, "loss": 0.6767, "step": 12794 }, { "epoch": 0.82, "grad_norm": 1.4528540970305046, "learning_rate": 8.349655425909348e-07, "loss": 0.6121, "step": 12795 }, { "epoch": 0.82, "grad_norm": 1.4984560948422556, "learning_rate": 8.343921446484177e-07, "loss": 0.6539, "step": 12796 }, { "epoch": 0.82, "grad_norm": 1.3984849852155317, "learning_rate": 8.338189257349755e-07, "loss": 0.6018, "step": 12797 }, { "epoch": 0.82, "grad_norm": 1.2694803008449216, "learning_rate": 8.332458858752391e-07, "loss": 0.6968, "step": 12798 }, { "epoch": 0.82, "grad_norm": 2.233933258001004, "learning_rate": 8.326730250938414e-07, "loss": 0.7755, "step": 12799 }, { "epoch": 0.82, "grad_norm": 1.8761229158042396, "learning_rate": 8.321003434153979e-07, "loss": 0.714, "step": 12800 }, { "epoch": 0.82, "grad_norm": 1.526294156362398, "learning_rate": 8.31527840864525e-07, "loss": 0.6743, "step": 12801 }, { "epoch": 0.82, "grad_norm": 1.295106199606758, "learning_rate": 8.309555174658263e-07, "loss": 0.7177, "step": 12802 }, { "epoch": 0.82, "grad_norm": 2.132354836058407, "learning_rate": 8.303833732438988e-07, "loss": 0.7131, "step": 12803 }, { "epoch": 0.82, "grad_norm": 1.7060927359969569, "learning_rate": 8.298114082233327e-07, "loss": 0.7022, "step": 12804 }, { "epoch": 0.82, "grad_norm": 1.0682181696877107, "learning_rate": 8.29239622428708e-07, "loss": 0.6435, "step": 12805 }, { "epoch": 0.82, "grad_norm": 1.4943415330626475, "learning_rate": 8.286680158846028e-07, "loss": 0.6483, "step": 12806 }, { "epoch": 0.82, "grad_norm": 2.0499464577283932, "learning_rate": 8.280965886155789e-07, "loss": 0.6565, "step": 12807 }, { "epoch": 0.82, "grad_norm": 2.0163082062991124, "learning_rate": 8.275253406461997e-07, "loss": 0.6269, "step": 12808 }, { "epoch": 0.82, "grad_norm": 1.539717618853155, "learning_rate": 8.26954272001011e-07, "loss": 0.7204, "step": 12809 }, { "epoch": 0.82, "grad_norm": 1.6349279017727532, "learning_rate": 8.263833827045603e-07, "loss": 0.6813, "step": 12810 }, { "epoch": 0.82, "grad_norm": 1.582337189384179, "learning_rate": 8.25812672781382e-07, "loss": 0.6282, "step": 12811 }, { "epoch": 0.82, "grad_norm": 1.6157312661066543, "learning_rate": 8.252421422560025e-07, "loss": 0.6678, "step": 12812 }, { "epoch": 0.82, "grad_norm": 1.605624893171032, "learning_rate": 8.246717911529456e-07, "loss": 0.715, "step": 12813 }, { "epoch": 0.82, "grad_norm": 1.6389680712794772, "learning_rate": 8.241016194967194e-07, "loss": 0.6032, "step": 12814 }, { "epoch": 0.82, "grad_norm": 1.3799486287965508, "learning_rate": 8.235316273118333e-07, "loss": 0.6564, "step": 12815 }, { "epoch": 0.82, "grad_norm": 1.6998100657666546, "learning_rate": 8.229618146227791e-07, "loss": 0.6314, "step": 12816 }, { "epoch": 0.82, "grad_norm": 1.8434718853349772, "learning_rate": 8.223921814540503e-07, "loss": 0.6739, "step": 12817 }, { "epoch": 0.82, "grad_norm": 1.4378854162395114, "learning_rate": 8.218227278301277e-07, "loss": 0.6284, "step": 12818 }, { "epoch": 0.82, "grad_norm": 1.5597860231387952, "learning_rate": 8.212534537754841e-07, "loss": 0.6402, "step": 12819 }, { "epoch": 0.82, "grad_norm": 1.631188827421167, "learning_rate": 8.206843593145864e-07, "loss": 0.6935, "step": 12820 }, { "epoch": 0.82, "grad_norm": 1.6955103249645629, "learning_rate": 8.201154444718917e-07, "loss": 0.6544, "step": 12821 }, { "epoch": 0.82, "grad_norm": 1.8328060350291646, "learning_rate": 8.195467092718546e-07, "loss": 0.751, "step": 12822 }, { "epoch": 0.82, "grad_norm": 1.7974850990489035, "learning_rate": 8.189781537389135e-07, "loss": 0.6461, "step": 12823 }, { "epoch": 0.82, "grad_norm": 1.3280729604022965, "learning_rate": 8.184097778975064e-07, "loss": 0.6241, "step": 12824 }, { "epoch": 0.82, "grad_norm": 1.5409304200717633, "learning_rate": 8.178415817720609e-07, "loss": 0.7182, "step": 12825 }, { "epoch": 0.82, "grad_norm": 1.2989638469202853, "learning_rate": 8.172735653869956e-07, "loss": 0.4968, "step": 12826 }, { "epoch": 0.82, "grad_norm": 1.5157551965832046, "learning_rate": 8.16705728766724e-07, "loss": 0.6374, "step": 12827 }, { "epoch": 0.82, "grad_norm": 1.5387790620802149, "learning_rate": 8.161380719356493e-07, "loss": 0.6918, "step": 12828 }, { "epoch": 0.82, "grad_norm": 1.5366009065384814, "learning_rate": 8.155705949181691e-07, "loss": 0.6533, "step": 12829 }, { "epoch": 0.82, "grad_norm": 1.633792073345595, "learning_rate": 8.150032977386707e-07, "loss": 0.7257, "step": 12830 }, { "epoch": 0.82, "grad_norm": 1.3487661938809725, "learning_rate": 8.144361804215384e-07, "loss": 0.6804, "step": 12831 }, { "epoch": 0.82, "grad_norm": 1.413538152062893, "learning_rate": 8.138692429911432e-07, "loss": 0.7423, "step": 12832 }, { "epoch": 0.82, "grad_norm": 1.6288079934771273, "learning_rate": 8.133024854718524e-07, "loss": 0.5802, "step": 12833 }, { "epoch": 0.82, "grad_norm": 1.717554707508415, "learning_rate": 8.127359078880226e-07, "loss": 0.7245, "step": 12834 }, { "epoch": 0.82, "grad_norm": 1.4277549119031843, "learning_rate": 8.121695102640053e-07, "loss": 0.5432, "step": 12835 }, { "epoch": 0.82, "grad_norm": 1.5553146450995812, "learning_rate": 8.116032926241424e-07, "loss": 0.6258, "step": 12836 }, { "epoch": 0.82, "grad_norm": 1.4812446808828958, "learning_rate": 8.110372549927692e-07, "loss": 0.6219, "step": 12837 }, { "epoch": 0.82, "grad_norm": 1.529546613619999, "learning_rate": 8.104713973942107e-07, "loss": 0.6653, "step": 12838 }, { "epoch": 0.82, "grad_norm": 1.5814215427634686, "learning_rate": 8.099057198527899e-07, "loss": 0.7156, "step": 12839 }, { "epoch": 0.82, "grad_norm": 1.2047239697107086, "learning_rate": 8.093402223928165e-07, "loss": 0.7155, "step": 12840 }, { "epoch": 0.82, "grad_norm": 1.4987916571367443, "learning_rate": 8.087749050385952e-07, "loss": 0.7117, "step": 12841 }, { "epoch": 0.82, "grad_norm": 1.8001588567348776, "learning_rate": 8.082097678144207e-07, "loss": 0.7348, "step": 12842 }, { "epoch": 0.82, "grad_norm": 1.6066829986892601, "learning_rate": 8.076448107445822e-07, "loss": 0.6273, "step": 12843 }, { "epoch": 0.82, "grad_norm": 1.478538803481497, "learning_rate": 8.070800338533608e-07, "loss": 0.62, "step": 12844 }, { "epoch": 0.82, "grad_norm": 0.9569609475111375, "learning_rate": 8.065154371650286e-07, "loss": 0.6449, "step": 12845 }, { "epoch": 0.82, "grad_norm": 1.669469624820584, "learning_rate": 8.059510207038517e-07, "loss": 0.6693, "step": 12846 }, { "epoch": 0.82, "grad_norm": 1.6940613330125065, "learning_rate": 8.053867844940855e-07, "loss": 0.6399, "step": 12847 }, { "epoch": 0.82, "grad_norm": 1.7731443544172565, "learning_rate": 8.048227285599825e-07, "loss": 0.7749, "step": 12848 }, { "epoch": 0.82, "grad_norm": 1.643070564867268, "learning_rate": 8.042588529257828e-07, "loss": 0.6405, "step": 12849 }, { "epoch": 0.82, "grad_norm": 1.7602843612616765, "learning_rate": 8.03695157615722e-07, "loss": 0.7321, "step": 12850 }, { "epoch": 0.82, "grad_norm": 1.4754264804109216, "learning_rate": 8.031316426540254e-07, "loss": 0.6929, "step": 12851 }, { "epoch": 0.82, "grad_norm": 2.050354569689909, "learning_rate": 8.025683080649116e-07, "loss": 0.639, "step": 12852 }, { "epoch": 0.82, "grad_norm": 1.0092281392250522, "learning_rate": 8.02005153872592e-07, "loss": 0.5794, "step": 12853 }, { "epoch": 0.82, "grad_norm": 1.5772948519715537, "learning_rate": 8.014421801012684e-07, "loss": 0.778, "step": 12854 }, { "epoch": 0.82, "grad_norm": 1.6185332757411854, "learning_rate": 8.008793867751402e-07, "loss": 0.6812, "step": 12855 }, { "epoch": 0.82, "grad_norm": 1.6561988175351223, "learning_rate": 8.003167739183903e-07, "loss": 0.6873, "step": 12856 }, { "epoch": 0.82, "grad_norm": 1.3968303757428413, "learning_rate": 7.997543415552011e-07, "loss": 0.6016, "step": 12857 }, { "epoch": 0.82, "grad_norm": 1.6163973880517934, "learning_rate": 7.991920897097449e-07, "loss": 0.6097, "step": 12858 }, { "epoch": 0.82, "grad_norm": 1.1239558247615564, "learning_rate": 7.986300184061857e-07, "loss": 0.703, "step": 12859 }, { "epoch": 0.82, "grad_norm": 1.5521438736546542, "learning_rate": 7.980681276686797e-07, "loss": 0.6918, "step": 12860 }, { "epoch": 0.82, "grad_norm": 1.502334962003217, "learning_rate": 7.975064175213748e-07, "loss": 0.5997, "step": 12861 }, { "epoch": 0.82, "grad_norm": 1.9062616323251227, "learning_rate": 7.969448879884162e-07, "loss": 0.5561, "step": 12862 }, { "epoch": 0.82, "grad_norm": 1.6465738952775164, "learning_rate": 7.963835390939317e-07, "loss": 0.5805, "step": 12863 }, { "epoch": 0.82, "grad_norm": 1.914640501410815, "learning_rate": 7.958223708620521e-07, "loss": 0.677, "step": 12864 }, { "epoch": 0.82, "grad_norm": 1.0608915786536806, "learning_rate": 7.952613833168909e-07, "loss": 0.6889, "step": 12865 }, { "epoch": 0.82, "grad_norm": 1.040474353033955, "learning_rate": 7.947005764825611e-07, "loss": 0.5874, "step": 12866 }, { "epoch": 0.82, "grad_norm": 1.1507937657120038, "learning_rate": 7.941399503831637e-07, "loss": 0.6435, "step": 12867 }, { "epoch": 0.82, "grad_norm": 1.6794540827739548, "learning_rate": 7.935795050427924e-07, "loss": 0.6686, "step": 12868 }, { "epoch": 0.82, "grad_norm": 1.7348443669016094, "learning_rate": 7.930192404855375e-07, "loss": 0.6999, "step": 12869 }, { "epoch": 0.82, "grad_norm": 1.381275214351722, "learning_rate": 7.924591567354728e-07, "loss": 0.5846, "step": 12870 }, { "epoch": 0.82, "grad_norm": 1.6461991776876785, "learning_rate": 7.918992538166753e-07, "loss": 0.6476, "step": 12871 }, { "epoch": 0.82, "grad_norm": 1.590940769660602, "learning_rate": 7.913395317532024e-07, "loss": 0.7218, "step": 12872 }, { "epoch": 0.82, "grad_norm": 1.5620607353777327, "learning_rate": 7.907799905691144e-07, "loss": 0.68, "step": 12873 }, { "epoch": 0.82, "grad_norm": 1.6343973795344688, "learning_rate": 7.90220630288458e-07, "loss": 0.6871, "step": 12874 }, { "epoch": 0.82, "grad_norm": 2.7153518674892467, "learning_rate": 7.896614509352724e-07, "loss": 0.802, "step": 12875 }, { "epoch": 0.82, "grad_norm": 1.6752004115921013, "learning_rate": 7.891024525335905e-07, "loss": 0.7254, "step": 12876 }, { "epoch": 0.82, "grad_norm": 1.4433901739769863, "learning_rate": 7.885436351074355e-07, "loss": 0.6922, "step": 12877 }, { "epoch": 0.82, "grad_norm": 1.4435400359277568, "learning_rate": 7.879849986808286e-07, "loss": 0.6473, "step": 12878 }, { "epoch": 0.82, "grad_norm": 1.2163859914520518, "learning_rate": 7.874265432777728e-07, "loss": 0.6184, "step": 12879 }, { "epoch": 0.82, "grad_norm": 1.5062288196139841, "learning_rate": 7.86868268922274e-07, "loss": 0.6308, "step": 12880 }, { "epoch": 0.82, "grad_norm": 1.6161193356662877, "learning_rate": 7.863101756383235e-07, "loss": 0.6982, "step": 12881 }, { "epoch": 0.82, "grad_norm": 1.499770932113532, "learning_rate": 7.857522634499082e-07, "loss": 0.6765, "step": 12882 }, { "epoch": 0.82, "grad_norm": 1.4276622611327436, "learning_rate": 7.851945323810045e-07, "loss": 0.6956, "step": 12883 }, { "epoch": 0.82, "grad_norm": 1.5070240573885143, "learning_rate": 7.846369824555838e-07, "loss": 0.6898, "step": 12884 }, { "epoch": 0.82, "grad_norm": 1.3174676078011973, "learning_rate": 7.840796136976075e-07, "loss": 0.6586, "step": 12885 }, { "epoch": 0.82, "grad_norm": 1.5136276598485516, "learning_rate": 7.835224261310293e-07, "loss": 0.6189, "step": 12886 }, { "epoch": 0.82, "grad_norm": 1.9338345191378201, "learning_rate": 7.829654197797998e-07, "loss": 0.6363, "step": 12887 }, { "epoch": 0.82, "grad_norm": 1.2362786730909552, "learning_rate": 7.824085946678534e-07, "loss": 0.7192, "step": 12888 }, { "epoch": 0.82, "grad_norm": 1.93096504946768, "learning_rate": 7.818519508191236e-07, "loss": 0.7196, "step": 12889 }, { "epoch": 0.83, "grad_norm": 1.4625283599413494, "learning_rate": 7.812954882575341e-07, "loss": 0.6102, "step": 12890 }, { "epoch": 0.83, "grad_norm": 1.5528646688509302, "learning_rate": 7.807392070069992e-07, "loss": 0.7187, "step": 12891 }, { "epoch": 0.83, "grad_norm": 2.513205046657216, "learning_rate": 7.801831070914279e-07, "loss": 0.6195, "step": 12892 }, { "epoch": 0.83, "grad_norm": 1.7689169114928711, "learning_rate": 7.796271885347189e-07, "loss": 0.6155, "step": 12893 }, { "epoch": 0.83, "grad_norm": 1.7644677347159994, "learning_rate": 7.790714513607656e-07, "loss": 0.6747, "step": 12894 }, { "epoch": 0.83, "grad_norm": 1.4492308602739739, "learning_rate": 7.785158955934508e-07, "loss": 0.6301, "step": 12895 }, { "epoch": 0.83, "grad_norm": 1.4430852741209126, "learning_rate": 7.779605212566533e-07, "loss": 0.7262, "step": 12896 }, { "epoch": 0.83, "grad_norm": 1.6967362566988429, "learning_rate": 7.774053283742406e-07, "loss": 0.6423, "step": 12897 }, { "epoch": 0.83, "grad_norm": 1.629354500762688, "learning_rate": 7.768503169700742e-07, "loss": 0.6971, "step": 12898 }, { "epoch": 0.83, "grad_norm": 1.4445322242663812, "learning_rate": 7.762954870680067e-07, "loss": 0.5791, "step": 12899 }, { "epoch": 0.83, "grad_norm": 1.6342934786120158, "learning_rate": 7.757408386918846e-07, "loss": 0.7183, "step": 12900 }, { "epoch": 0.83, "grad_norm": 1.7715486663500244, "learning_rate": 7.751863718655444e-07, "loss": 0.6446, "step": 12901 }, { "epoch": 0.83, "grad_norm": 1.577241625765583, "learning_rate": 7.746320866128171e-07, "loss": 0.8219, "step": 12902 }, { "epoch": 0.83, "grad_norm": 1.4938876071910838, "learning_rate": 7.740779829575218e-07, "loss": 0.7196, "step": 12903 }, { "epoch": 0.83, "grad_norm": 1.8252659806131484, "learning_rate": 7.735240609234767e-07, "loss": 0.6903, "step": 12904 }, { "epoch": 0.83, "grad_norm": 1.6017402445253304, "learning_rate": 7.729703205344863e-07, "loss": 0.5999, "step": 12905 }, { "epoch": 0.83, "grad_norm": 1.7007470681756718, "learning_rate": 7.724167618143497e-07, "loss": 0.6908, "step": 12906 }, { "epoch": 0.83, "grad_norm": 1.3510703064190963, "learning_rate": 7.718633847868568e-07, "loss": 0.8032, "step": 12907 }, { "epoch": 0.83, "grad_norm": 1.5480997974621389, "learning_rate": 7.713101894757913e-07, "loss": 0.6049, "step": 12908 }, { "epoch": 0.83, "grad_norm": 1.529562822459096, "learning_rate": 7.707571759049281e-07, "loss": 0.6873, "step": 12909 }, { "epoch": 0.83, "grad_norm": 1.463704163194718, "learning_rate": 7.702043440980333e-07, "loss": 0.608, "step": 12910 }, { "epoch": 0.83, "grad_norm": 1.5059235106955504, "learning_rate": 7.696516940788701e-07, "loss": 0.5947, "step": 12911 }, { "epoch": 0.83, "grad_norm": 1.7191057388486046, "learning_rate": 7.690992258711855e-07, "loss": 0.7603, "step": 12912 }, { "epoch": 0.83, "grad_norm": 1.5658479224018824, "learning_rate": 7.685469394987271e-07, "loss": 0.6211, "step": 12913 }, { "epoch": 0.83, "grad_norm": 1.3300904217525427, "learning_rate": 7.679948349852301e-07, "loss": 0.7447, "step": 12914 }, { "epoch": 0.83, "grad_norm": 1.5115803163068942, "learning_rate": 7.67442912354422e-07, "loss": 0.6476, "step": 12915 }, { "epoch": 0.83, "grad_norm": 1.2766100369761069, "learning_rate": 7.668911716300237e-07, "loss": 0.6133, "step": 12916 }, { "epoch": 0.83, "grad_norm": 1.745468113025497, "learning_rate": 7.663396128357481e-07, "loss": 0.749, "step": 12917 }, { "epoch": 0.83, "grad_norm": 1.426041479479058, "learning_rate": 7.657882359952995e-07, "loss": 0.7586, "step": 12918 }, { "epoch": 0.83, "grad_norm": 1.47289585179571, "learning_rate": 7.652370411323745e-07, "loss": 0.6691, "step": 12919 }, { "epoch": 0.83, "grad_norm": 1.7782837289800084, "learning_rate": 7.646860282706652e-07, "loss": 0.6479, "step": 12920 }, { "epoch": 0.83, "grad_norm": 1.663342350371355, "learning_rate": 7.641351974338478e-07, "loss": 0.6107, "step": 12921 }, { "epoch": 0.83, "grad_norm": 1.2726548689424855, "learning_rate": 7.635845486456006e-07, "loss": 0.5288, "step": 12922 }, { "epoch": 0.83, "grad_norm": 3.044933978133101, "learning_rate": 7.630340819295879e-07, "loss": 0.6602, "step": 12923 }, { "epoch": 0.83, "grad_norm": 1.4895288374356266, "learning_rate": 7.624837973094668e-07, "loss": 0.5166, "step": 12924 }, { "epoch": 0.83, "grad_norm": 1.509560776149315, "learning_rate": 7.619336948088879e-07, "loss": 0.6626, "step": 12925 }, { "epoch": 0.83, "grad_norm": 1.7588470400577987, "learning_rate": 7.613837744514918e-07, "loss": 0.6526, "step": 12926 }, { "epoch": 0.83, "grad_norm": 1.5138628224159818, "learning_rate": 7.608340362609174e-07, "loss": 0.694, "step": 12927 }, { "epoch": 0.83, "grad_norm": 1.473784859078345, "learning_rate": 7.602844802607862e-07, "loss": 0.6431, "step": 12928 }, { "epoch": 0.83, "grad_norm": 1.787231702567111, "learning_rate": 7.597351064747211e-07, "loss": 0.7278, "step": 12929 }, { "epoch": 0.83, "grad_norm": 1.9638555220861866, "learning_rate": 7.591859149263287e-07, "loss": 0.6513, "step": 12930 }, { "epoch": 0.83, "grad_norm": 1.036944569277715, "learning_rate": 7.586369056392162e-07, "loss": 0.6347, "step": 12931 }, { "epoch": 0.83, "grad_norm": 1.4008832309316874, "learning_rate": 7.580880786369766e-07, "loss": 0.6195, "step": 12932 }, { "epoch": 0.83, "grad_norm": 1.4685764963254322, "learning_rate": 7.575394339431969e-07, "loss": 0.6273, "step": 12933 }, { "epoch": 0.83, "grad_norm": 1.7468125610763525, "learning_rate": 7.569909715814605e-07, "loss": 0.6644, "step": 12934 }, { "epoch": 0.83, "grad_norm": 1.5548832872901344, "learning_rate": 7.564426915753331e-07, "loss": 0.644, "step": 12935 }, { "epoch": 0.83, "grad_norm": 1.6160854447138013, "learning_rate": 7.558945939483847e-07, "loss": 0.7422, "step": 12936 }, { "epoch": 0.83, "grad_norm": 1.4835367084515356, "learning_rate": 7.553466787241665e-07, "loss": 0.6814, "step": 12937 }, { "epoch": 0.83, "grad_norm": 1.6265672804105151, "learning_rate": 7.547989459262295e-07, "loss": 0.662, "step": 12938 }, { "epoch": 0.83, "grad_norm": 1.520556592202545, "learning_rate": 7.542513955781139e-07, "loss": 0.6063, "step": 12939 }, { "epoch": 0.83, "grad_norm": 1.5857326354665064, "learning_rate": 7.537040277033514e-07, "loss": 0.6298, "step": 12940 }, { "epoch": 0.83, "grad_norm": 1.1225760497749018, "learning_rate": 7.53156842325467e-07, "loss": 0.6962, "step": 12941 }, { "epoch": 0.83, "grad_norm": 1.9055706544894375, "learning_rate": 7.52609839467977e-07, "loss": 0.696, "step": 12942 }, { "epoch": 0.83, "grad_norm": 1.1788918399496815, "learning_rate": 7.520630191543932e-07, "loss": 0.6844, "step": 12943 }, { "epoch": 0.83, "grad_norm": 1.5285524734913456, "learning_rate": 7.515163814082121e-07, "loss": 0.6094, "step": 12944 }, { "epoch": 0.83, "grad_norm": 1.5895771447407587, "learning_rate": 7.509699262529308e-07, "loss": 0.6576, "step": 12945 }, { "epoch": 0.83, "grad_norm": 1.8514279476453726, "learning_rate": 7.504236537120341e-07, "loss": 0.6693, "step": 12946 }, { "epoch": 0.83, "grad_norm": 1.7767634713264222, "learning_rate": 7.498775638089989e-07, "loss": 0.7303, "step": 12947 }, { "epoch": 0.83, "grad_norm": 1.6229784338375008, "learning_rate": 7.493316565672948e-07, "loss": 0.6249, "step": 12948 }, { "epoch": 0.83, "grad_norm": 1.672457084098765, "learning_rate": 7.487859320103847e-07, "loss": 0.6607, "step": 12949 }, { "epoch": 0.83, "grad_norm": 1.6171024453856908, "learning_rate": 7.482403901617225e-07, "loss": 0.6339, "step": 12950 }, { "epoch": 0.83, "grad_norm": 1.5752543617519892, "learning_rate": 7.476950310447523e-07, "loss": 0.6181, "step": 12951 }, { "epoch": 0.83, "grad_norm": 2.040344774331973, "learning_rate": 7.471498546829159e-07, "loss": 0.6978, "step": 12952 }, { "epoch": 0.83, "grad_norm": 1.8320268089191165, "learning_rate": 7.466048610996423e-07, "loss": 0.718, "step": 12953 }, { "epoch": 0.83, "grad_norm": 1.0839906402864876, "learning_rate": 7.46060050318354e-07, "loss": 0.7061, "step": 12954 }, { "epoch": 0.83, "grad_norm": 1.4865495738333274, "learning_rate": 7.455154223624661e-07, "loss": 0.7367, "step": 12955 }, { "epoch": 0.83, "grad_norm": 2.7374650852641498, "learning_rate": 7.449709772553853e-07, "loss": 0.6556, "step": 12956 }, { "epoch": 0.83, "grad_norm": 1.631131655233882, "learning_rate": 7.444267150205108e-07, "loss": 0.6522, "step": 12957 }, { "epoch": 0.83, "grad_norm": 1.7021072710566736, "learning_rate": 7.438826356812345e-07, "loss": 0.6589, "step": 12958 }, { "epoch": 0.83, "grad_norm": 1.405787199765223, "learning_rate": 7.433387392609387e-07, "loss": 0.7319, "step": 12959 }, { "epoch": 0.83, "grad_norm": 1.6474872180490345, "learning_rate": 7.42795025782998e-07, "loss": 0.6667, "step": 12960 }, { "epoch": 0.83, "grad_norm": 1.4778503555790317, "learning_rate": 7.422514952707832e-07, "loss": 0.5695, "step": 12961 }, { "epoch": 0.83, "grad_norm": 1.502979215159305, "learning_rate": 7.417081477476523e-07, "loss": 0.6583, "step": 12962 }, { "epoch": 0.83, "grad_norm": 1.4974844636675122, "learning_rate": 7.411649832369566e-07, "loss": 0.6093, "step": 12963 }, { "epoch": 0.83, "grad_norm": 1.6019313046965242, "learning_rate": 7.406220017620414e-07, "loss": 0.7454, "step": 12964 }, { "epoch": 0.83, "grad_norm": 1.1033055918621086, "learning_rate": 7.400792033462428e-07, "loss": 0.6268, "step": 12965 }, { "epoch": 0.83, "grad_norm": 1.6537455822678466, "learning_rate": 7.39536588012888e-07, "loss": 0.6865, "step": 12966 }, { "epoch": 0.83, "grad_norm": 1.5462445473013269, "learning_rate": 7.389941557852987e-07, "loss": 0.7351, "step": 12967 }, { "epoch": 0.83, "grad_norm": 1.5921278279150404, "learning_rate": 7.384519066867851e-07, "loss": 0.6577, "step": 12968 }, { "epoch": 0.83, "grad_norm": 1.382706497675559, "learning_rate": 7.379098407406554e-07, "loss": 0.7399, "step": 12969 }, { "epoch": 0.83, "grad_norm": 1.5176245153505572, "learning_rate": 7.373679579702053e-07, "loss": 0.7018, "step": 12970 }, { "epoch": 0.83, "grad_norm": 1.6121812676677278, "learning_rate": 7.368262583987229e-07, "loss": 0.5491, "step": 12971 }, { "epoch": 0.83, "grad_norm": 1.8230404097843393, "learning_rate": 7.362847420494896e-07, "loss": 0.68, "step": 12972 }, { "epoch": 0.83, "grad_norm": 1.5326715539235158, "learning_rate": 7.357434089457788e-07, "loss": 0.6932, "step": 12973 }, { "epoch": 0.83, "grad_norm": 1.4752975461925735, "learning_rate": 7.35202259110856e-07, "loss": 0.6731, "step": 12974 }, { "epoch": 0.83, "grad_norm": 1.6118170062599804, "learning_rate": 7.346612925679774e-07, "loss": 0.6438, "step": 12975 }, { "epoch": 0.83, "grad_norm": 1.064889740418157, "learning_rate": 7.341205093403963e-07, "loss": 0.7517, "step": 12976 }, { "epoch": 0.83, "grad_norm": 2.0327933836217706, "learning_rate": 7.33579909451349e-07, "loss": 0.6651, "step": 12977 }, { "epoch": 0.83, "grad_norm": 1.559899966818161, "learning_rate": 7.330394929240736e-07, "loss": 0.6351, "step": 12978 }, { "epoch": 0.83, "grad_norm": 1.4073089882561876, "learning_rate": 7.324992597817948e-07, "loss": 0.673, "step": 12979 }, { "epoch": 0.83, "grad_norm": 1.4431155345817495, "learning_rate": 7.319592100477307e-07, "loss": 0.5631, "step": 12980 }, { "epoch": 0.83, "grad_norm": 1.6035965609899836, "learning_rate": 7.314193437450911e-07, "loss": 0.6878, "step": 12981 }, { "epoch": 0.83, "grad_norm": 1.527625529524839, "learning_rate": 7.308796608970775e-07, "loss": 0.6566, "step": 12982 }, { "epoch": 0.83, "grad_norm": 2.4054911420833696, "learning_rate": 7.30340161526888e-07, "loss": 0.638, "step": 12983 }, { "epoch": 0.83, "grad_norm": 1.6295617549623886, "learning_rate": 7.298008456577038e-07, "loss": 0.6657, "step": 12984 }, { "epoch": 0.83, "grad_norm": 1.6032852416864292, "learning_rate": 7.292617133127083e-07, "loss": 0.677, "step": 12985 }, { "epoch": 0.83, "grad_norm": 1.6684515960885473, "learning_rate": 7.287227645150686e-07, "loss": 0.7443, "step": 12986 }, { "epoch": 0.83, "grad_norm": 1.5710503255655766, "learning_rate": 7.281839992879503e-07, "loss": 0.7107, "step": 12987 }, { "epoch": 0.83, "grad_norm": 1.9369826333145759, "learning_rate": 7.276454176545078e-07, "loss": 0.6567, "step": 12988 }, { "epoch": 0.83, "grad_norm": 1.4226804547927452, "learning_rate": 7.271070196378859e-07, "loss": 0.6294, "step": 12989 }, { "epoch": 0.83, "grad_norm": 1.5497626330496803, "learning_rate": 7.265688052612285e-07, "loss": 0.7181, "step": 12990 }, { "epoch": 0.83, "grad_norm": 1.5879211932284465, "learning_rate": 7.260307745476619e-07, "loss": 0.5779, "step": 12991 }, { "epoch": 0.83, "grad_norm": 1.6152172048120104, "learning_rate": 7.254929275203138e-07, "loss": 0.689, "step": 12992 }, { "epoch": 0.83, "grad_norm": 1.596094018639574, "learning_rate": 7.249552642022956e-07, "loss": 0.6822, "step": 12993 }, { "epoch": 0.83, "grad_norm": 1.610973758937844, "learning_rate": 7.244177846167177e-07, "loss": 0.704, "step": 12994 }, { "epoch": 0.83, "grad_norm": 1.5359262302056333, "learning_rate": 7.238804887866796e-07, "loss": 0.6177, "step": 12995 }, { "epoch": 0.83, "grad_norm": 1.6818442365172928, "learning_rate": 7.233433767352727e-07, "loss": 0.7502, "step": 12996 }, { "epoch": 0.83, "grad_norm": 1.5692952155981532, "learning_rate": 7.228064484855807e-07, "loss": 0.6669, "step": 12997 }, { "epoch": 0.83, "grad_norm": 1.5250390099890065, "learning_rate": 7.222697040606791e-07, "loss": 0.7597, "step": 12998 }, { "epoch": 0.83, "grad_norm": 1.5981848555049494, "learning_rate": 7.217331434836395e-07, "loss": 0.6685, "step": 12999 }, { "epoch": 0.83, "grad_norm": 1.3576115362010122, "learning_rate": 7.211967667775166e-07, "loss": 0.5986, "step": 13000 }, { "epoch": 0.83, "grad_norm": 1.8359974515094453, "learning_rate": 7.206605739653683e-07, "loss": 0.6962, "step": 13001 }, { "epoch": 0.83, "grad_norm": 1.5330217826521668, "learning_rate": 7.201245650702338e-07, "loss": 0.6135, "step": 13002 }, { "epoch": 0.83, "grad_norm": 1.3826392604944653, "learning_rate": 7.195887401151536e-07, "loss": 0.6498, "step": 13003 }, { "epoch": 0.83, "grad_norm": 1.1108156042885566, "learning_rate": 7.190530991231548e-07, "loss": 0.634, "step": 13004 }, { "epoch": 0.83, "grad_norm": 1.615743232750441, "learning_rate": 7.185176421172573e-07, "loss": 0.742, "step": 13005 }, { "epoch": 0.83, "grad_norm": 1.3952779340684887, "learning_rate": 7.179823691204768e-07, "loss": 0.6969, "step": 13006 }, { "epoch": 0.83, "grad_norm": 1.067691449212686, "learning_rate": 7.174472801558147e-07, "loss": 0.6572, "step": 13007 }, { "epoch": 0.83, "grad_norm": 1.4348832543665697, "learning_rate": 7.169123752462714e-07, "loss": 0.6016, "step": 13008 }, { "epoch": 0.83, "grad_norm": 1.6749522473768423, "learning_rate": 7.163776544148321e-07, "loss": 0.8008, "step": 13009 }, { "epoch": 0.83, "grad_norm": 1.3878027616112845, "learning_rate": 7.158431176844815e-07, "loss": 0.6686, "step": 13010 }, { "epoch": 0.83, "grad_norm": 1.8754754757132064, "learning_rate": 7.153087650781909e-07, "loss": 0.6349, "step": 13011 }, { "epoch": 0.83, "grad_norm": 1.5357627199470483, "learning_rate": 7.147745966189267e-07, "loss": 0.6375, "step": 13012 }, { "epoch": 0.83, "grad_norm": 1.5885306062750484, "learning_rate": 7.142406123296452e-07, "loss": 0.6181, "step": 13013 }, { "epoch": 0.83, "grad_norm": 1.483533917841398, "learning_rate": 7.137068122332974e-07, "loss": 0.5612, "step": 13014 }, { "epoch": 0.83, "grad_norm": 1.5741719846546214, "learning_rate": 7.131731963528232e-07, "loss": 0.7508, "step": 13015 }, { "epoch": 0.83, "grad_norm": 1.5050198958000183, "learning_rate": 7.126397647111566e-07, "loss": 0.7552, "step": 13016 }, { "epoch": 0.83, "grad_norm": 1.5822364452348743, "learning_rate": 7.121065173312253e-07, "loss": 0.6515, "step": 13017 }, { "epoch": 0.83, "grad_norm": 1.528510007270604, "learning_rate": 7.115734542359454e-07, "loss": 0.6217, "step": 13018 }, { "epoch": 0.83, "grad_norm": 1.2252383047009778, "learning_rate": 7.110405754482269e-07, "loss": 0.62, "step": 13019 }, { "epoch": 0.83, "grad_norm": 1.7318394207903596, "learning_rate": 7.105078809909727e-07, "loss": 0.7118, "step": 13020 }, { "epoch": 0.83, "grad_norm": 1.8829310163206778, "learning_rate": 7.09975370887076e-07, "loss": 0.6449, "step": 13021 }, { "epoch": 0.83, "grad_norm": 1.2452359012449927, "learning_rate": 7.09443045159423e-07, "loss": 0.629, "step": 13022 }, { "epoch": 0.83, "grad_norm": 1.3607231837999219, "learning_rate": 7.089109038308928e-07, "loss": 0.6393, "step": 13023 }, { "epoch": 0.83, "grad_norm": 1.017523823929408, "learning_rate": 7.083789469243535e-07, "loss": 0.6299, "step": 13024 }, { "epoch": 0.83, "grad_norm": 1.0833182219014168, "learning_rate": 7.078471744626708e-07, "loss": 0.6359, "step": 13025 }, { "epoch": 0.83, "grad_norm": 1.3739480427385367, "learning_rate": 7.07315586468697e-07, "loss": 0.6059, "step": 13026 }, { "epoch": 0.83, "grad_norm": 1.9456924839996035, "learning_rate": 7.067841829652794e-07, "loss": 0.6634, "step": 13027 }, { "epoch": 0.83, "grad_norm": 1.6493974335057289, "learning_rate": 7.062529639752558e-07, "loss": 0.7, "step": 13028 }, { "epoch": 0.83, "grad_norm": 1.7150332061696814, "learning_rate": 7.057219295214579e-07, "loss": 0.6435, "step": 13029 }, { "epoch": 0.83, "grad_norm": 2.127080986948024, "learning_rate": 7.051910796267081e-07, "loss": 0.7212, "step": 13030 }, { "epoch": 0.83, "grad_norm": 1.4459105963504233, "learning_rate": 7.046604143138198e-07, "loss": 0.6323, "step": 13031 }, { "epoch": 0.83, "grad_norm": 1.7457037602110916, "learning_rate": 7.041299336056028e-07, "loss": 0.665, "step": 13032 }, { "epoch": 0.83, "grad_norm": 1.5267964381824624, "learning_rate": 7.035996375248527e-07, "loss": 0.649, "step": 13033 }, { "epoch": 0.83, "grad_norm": 1.6724952849277221, "learning_rate": 7.030695260943637e-07, "loss": 0.7321, "step": 13034 }, { "epoch": 0.83, "grad_norm": 1.510504414226827, "learning_rate": 7.025395993369166e-07, "loss": 0.5886, "step": 13035 }, { "epoch": 0.83, "grad_norm": 1.693587356268006, "learning_rate": 7.020098572752876e-07, "loss": 0.6053, "step": 13036 }, { "epoch": 0.83, "grad_norm": 1.4827072427389316, "learning_rate": 7.01480299932244e-07, "loss": 0.6388, "step": 13037 }, { "epoch": 0.83, "grad_norm": 1.5554412869700487, "learning_rate": 7.009509273305442e-07, "loss": 0.6415, "step": 13038 }, { "epoch": 0.83, "grad_norm": 1.568783998800523, "learning_rate": 7.004217394929402e-07, "loss": 0.617, "step": 13039 }, { "epoch": 0.83, "grad_norm": 1.8055657803200984, "learning_rate": 6.998927364421737e-07, "loss": 0.6934, "step": 13040 }, { "epoch": 0.83, "grad_norm": 1.4756276299528266, "learning_rate": 6.993639182009843e-07, "loss": 0.6199, "step": 13041 }, { "epoch": 0.83, "grad_norm": 1.499089789008507, "learning_rate": 6.988352847920943e-07, "loss": 0.5609, "step": 13042 }, { "epoch": 0.83, "grad_norm": 1.776205864402136, "learning_rate": 6.983068362382272e-07, "loss": 0.7397, "step": 13043 }, { "epoch": 0.83, "grad_norm": 1.7131170833033114, "learning_rate": 6.977785725620928e-07, "loss": 0.7362, "step": 13044 }, { "epoch": 0.83, "grad_norm": 1.7935135022382236, "learning_rate": 6.972504937863955e-07, "loss": 0.675, "step": 13045 }, { "epoch": 0.84, "grad_norm": 1.744356962384265, "learning_rate": 6.967225999338306e-07, "loss": 0.5982, "step": 13046 }, { "epoch": 0.84, "grad_norm": 2.5553188231294652, "learning_rate": 6.961948910270844e-07, "loss": 0.8091, "step": 13047 }, { "epoch": 0.84, "grad_norm": 1.6305166803879472, "learning_rate": 6.956673670888409e-07, "loss": 0.6915, "step": 13048 }, { "epoch": 0.84, "grad_norm": 1.5535118831983838, "learning_rate": 6.951400281417669e-07, "loss": 0.7727, "step": 13049 }, { "epoch": 0.84, "grad_norm": 1.7373381812482747, "learning_rate": 6.946128742085311e-07, "loss": 0.7314, "step": 13050 }, { "epoch": 0.84, "grad_norm": 1.749672981221791, "learning_rate": 6.940859053117843e-07, "loss": 0.6748, "step": 13051 }, { "epoch": 0.84, "grad_norm": 1.4825692202450282, "learning_rate": 6.935591214741794e-07, "loss": 0.6675, "step": 13052 }, { "epoch": 0.84, "grad_norm": 1.0542417297658087, "learning_rate": 6.930325227183537e-07, "loss": 0.7466, "step": 13053 }, { "epoch": 0.84, "grad_norm": 1.4527207306333951, "learning_rate": 6.925061090669389e-07, "loss": 0.6405, "step": 13054 }, { "epoch": 0.84, "grad_norm": 1.5524641434510047, "learning_rate": 6.919798805425626e-07, "loss": 0.6352, "step": 13055 }, { "epoch": 0.84, "grad_norm": 1.4872817243031937, "learning_rate": 6.914538371678364e-07, "loss": 0.5779, "step": 13056 }, { "epoch": 0.84, "grad_norm": 1.5346008096587453, "learning_rate": 6.909279789653734e-07, "loss": 0.6603, "step": 13057 }, { "epoch": 0.84, "grad_norm": 1.0241329517079794, "learning_rate": 6.904023059577686e-07, "loss": 0.6442, "step": 13058 }, { "epoch": 0.84, "grad_norm": 1.8164999868813483, "learning_rate": 6.89876818167618e-07, "loss": 0.7532, "step": 13059 }, { "epoch": 0.84, "grad_norm": 1.7088140801565934, "learning_rate": 6.893515156175051e-07, "loss": 0.5949, "step": 13060 }, { "epoch": 0.84, "grad_norm": 1.2421742191140528, "learning_rate": 6.888263983300048e-07, "loss": 0.6993, "step": 13061 }, { "epoch": 0.84, "grad_norm": 1.6728967475279188, "learning_rate": 6.883014663276894e-07, "loss": 0.578, "step": 13062 }, { "epoch": 0.84, "grad_norm": 1.7586025088732138, "learning_rate": 6.877767196331147e-07, "loss": 0.6156, "step": 13063 }, { "epoch": 0.84, "grad_norm": 1.760407610174326, "learning_rate": 6.872521582688374e-07, "loss": 0.707, "step": 13064 }, { "epoch": 0.84, "grad_norm": 1.4867005561068014, "learning_rate": 6.867277822573975e-07, "loss": 0.6512, "step": 13065 }, { "epoch": 0.84, "grad_norm": 1.21610831360762, "learning_rate": 6.862035916213361e-07, "loss": 0.6442, "step": 13066 }, { "epoch": 0.84, "grad_norm": 1.8049251866705034, "learning_rate": 6.856795863831789e-07, "loss": 0.5902, "step": 13067 }, { "epoch": 0.84, "grad_norm": 1.470799955412531, "learning_rate": 6.851557665654479e-07, "loss": 0.6275, "step": 13068 }, { "epoch": 0.84, "grad_norm": 1.6235643700780202, "learning_rate": 6.846321321906551e-07, "loss": 0.664, "step": 13069 }, { "epoch": 0.84, "grad_norm": 1.4374835135170085, "learning_rate": 6.841086832813043e-07, "loss": 0.566, "step": 13070 }, { "epoch": 0.84, "grad_norm": 1.6254912281786218, "learning_rate": 6.835854198598957e-07, "loss": 0.6799, "step": 13071 }, { "epoch": 0.84, "grad_norm": 1.6342210633301346, "learning_rate": 6.830623419489135e-07, "loss": 0.6716, "step": 13072 }, { "epoch": 0.84, "grad_norm": 1.6467646030204661, "learning_rate": 6.825394495708415e-07, "loss": 0.7379, "step": 13073 }, { "epoch": 0.84, "grad_norm": 1.5714888495530526, "learning_rate": 6.820167427481522e-07, "loss": 0.651, "step": 13074 }, { "epoch": 0.84, "grad_norm": 1.579427193321227, "learning_rate": 6.814942215033099e-07, "loss": 0.6558, "step": 13075 }, { "epoch": 0.84, "grad_norm": 1.750984227131296, "learning_rate": 6.80971885858771e-07, "loss": 0.7517, "step": 13076 }, { "epoch": 0.84, "grad_norm": 1.681739633147709, "learning_rate": 6.804497358369855e-07, "loss": 0.6417, "step": 13077 }, { "epoch": 0.84, "grad_norm": 1.7608595770672448, "learning_rate": 6.799277714603935e-07, "loss": 0.643, "step": 13078 }, { "epoch": 0.84, "grad_norm": 1.690292438512689, "learning_rate": 6.794059927514268e-07, "loss": 0.61, "step": 13079 }, { "epoch": 0.84, "grad_norm": 1.6058855964115155, "learning_rate": 6.788843997325145e-07, "loss": 0.6468, "step": 13080 }, { "epoch": 0.84, "grad_norm": 1.7030799786231074, "learning_rate": 6.783629924260682e-07, "loss": 0.6802, "step": 13081 }, { "epoch": 0.84, "grad_norm": 1.5803807995949597, "learning_rate": 6.778417708545004e-07, "loss": 0.6471, "step": 13082 }, { "epoch": 0.84, "grad_norm": 1.5199427749807135, "learning_rate": 6.773207350402117e-07, "loss": 0.4978, "step": 13083 }, { "epoch": 0.84, "grad_norm": 1.5432181962257678, "learning_rate": 6.767998850055946e-07, "loss": 0.7002, "step": 13084 }, { "epoch": 0.84, "grad_norm": 1.7163752905868914, "learning_rate": 6.762792207730334e-07, "loss": 0.6824, "step": 13085 }, { "epoch": 0.84, "grad_norm": 1.6056499551646148, "learning_rate": 6.757587423649065e-07, "loss": 0.6767, "step": 13086 }, { "epoch": 0.84, "grad_norm": 1.6491659733706392, "learning_rate": 6.752384498035824e-07, "loss": 0.6819, "step": 13087 }, { "epoch": 0.84, "grad_norm": 1.413170626642955, "learning_rate": 6.747183431114218e-07, "loss": 0.618, "step": 13088 }, { "epoch": 0.84, "grad_norm": 1.3774420844997999, "learning_rate": 6.741984223107773e-07, "loss": 0.7295, "step": 13089 }, { "epoch": 0.84, "grad_norm": 1.7599470637709287, "learning_rate": 6.736786874239959e-07, "loss": 0.6231, "step": 13090 }, { "epoch": 0.84, "grad_norm": 1.6694656662327987, "learning_rate": 6.731591384734138e-07, "loss": 0.657, "step": 13091 }, { "epoch": 0.84, "grad_norm": 1.0178453518597574, "learning_rate": 6.726397754813596e-07, "loss": 0.6738, "step": 13092 }, { "epoch": 0.84, "grad_norm": 1.4743645600411404, "learning_rate": 6.721205984701551e-07, "loss": 0.6337, "step": 13093 }, { "epoch": 0.84, "grad_norm": 1.5390926296577208, "learning_rate": 6.716016074621135e-07, "loss": 0.7058, "step": 13094 }, { "epoch": 0.84, "grad_norm": 1.596152202684797, "learning_rate": 6.71082802479539e-07, "loss": 0.6878, "step": 13095 }, { "epoch": 0.84, "grad_norm": 1.8333816839295765, "learning_rate": 6.705641835447286e-07, "loss": 0.6312, "step": 13096 }, { "epoch": 0.84, "grad_norm": 1.720060990137022, "learning_rate": 6.70045750679974e-07, "loss": 0.7088, "step": 13097 }, { "epoch": 0.84, "grad_norm": 1.7028224793935285, "learning_rate": 6.695275039075527e-07, "loss": 0.7069, "step": 13098 }, { "epoch": 0.84, "grad_norm": 1.531321748787951, "learning_rate": 6.690094432497407e-07, "loss": 0.5922, "step": 13099 }, { "epoch": 0.84, "grad_norm": 1.6058629745755588, "learning_rate": 6.684915687288023e-07, "loss": 0.6699, "step": 13100 }, { "epoch": 0.84, "grad_norm": 1.5632310530648992, "learning_rate": 6.679738803669944e-07, "loss": 0.5999, "step": 13101 }, { "epoch": 0.84, "grad_norm": 1.8219384179253533, "learning_rate": 6.674563781865662e-07, "loss": 0.6233, "step": 13102 }, { "epoch": 0.84, "grad_norm": 1.4078901375214685, "learning_rate": 6.669390622097577e-07, "loss": 0.5828, "step": 13103 }, { "epoch": 0.84, "grad_norm": 1.6509849469929645, "learning_rate": 6.664219324588056e-07, "loss": 0.6449, "step": 13104 }, { "epoch": 0.84, "grad_norm": 1.7027384896826403, "learning_rate": 6.65904988955931e-07, "loss": 0.7014, "step": 13105 }, { "epoch": 0.84, "grad_norm": 1.4744035551444292, "learning_rate": 6.653882317233546e-07, "loss": 0.6111, "step": 13106 }, { "epoch": 0.84, "grad_norm": 1.9986359633500994, "learning_rate": 6.648716607832811e-07, "loss": 0.7501, "step": 13107 }, { "epoch": 0.84, "grad_norm": 1.6429652526217284, "learning_rate": 6.643552761579159e-07, "loss": 0.7039, "step": 13108 }, { "epoch": 0.84, "grad_norm": 1.8843636633206018, "learning_rate": 6.638390778694504e-07, "loss": 0.6673, "step": 13109 }, { "epoch": 0.84, "grad_norm": 1.5425099879143773, "learning_rate": 6.633230659400697e-07, "loss": 0.6329, "step": 13110 }, { "epoch": 0.84, "grad_norm": 1.0193323249124007, "learning_rate": 6.628072403919511e-07, "loss": 0.5809, "step": 13111 }, { "epoch": 0.84, "grad_norm": 1.462985300637387, "learning_rate": 6.62291601247263e-07, "loss": 0.5897, "step": 13112 }, { "epoch": 0.84, "grad_norm": 1.587104514094026, "learning_rate": 6.617761485281687e-07, "loss": 0.6156, "step": 13113 }, { "epoch": 0.84, "grad_norm": 1.4476997077152527, "learning_rate": 6.612608822568173e-07, "loss": 0.6482, "step": 13114 }, { "epoch": 0.84, "grad_norm": 1.5209304949047526, "learning_rate": 6.607458024553576e-07, "loss": 0.6239, "step": 13115 }, { "epoch": 0.84, "grad_norm": 1.5080045573049552, "learning_rate": 6.602309091459253e-07, "loss": 0.5857, "step": 13116 }, { "epoch": 0.84, "grad_norm": 1.7636000498240951, "learning_rate": 6.597162023506492e-07, "loss": 0.6959, "step": 13117 }, { "epoch": 0.84, "grad_norm": 1.4652108239715789, "learning_rate": 6.592016820916508e-07, "loss": 0.5867, "step": 13118 }, { "epoch": 0.84, "grad_norm": 1.4592163813884953, "learning_rate": 6.586873483910416e-07, "loss": 0.6575, "step": 13119 }, { "epoch": 0.84, "grad_norm": 1.4696545373821481, "learning_rate": 6.581732012709303e-07, "loss": 0.5985, "step": 13120 }, { "epoch": 0.84, "grad_norm": 0.9836140282570268, "learning_rate": 6.576592407534088e-07, "loss": 0.6529, "step": 13121 }, { "epoch": 0.84, "grad_norm": 1.5848367394250753, "learning_rate": 6.571454668605715e-07, "loss": 0.6264, "step": 13122 }, { "epoch": 0.84, "grad_norm": 1.0472938297824412, "learning_rate": 6.566318796144933e-07, "loss": 0.6508, "step": 13123 }, { "epoch": 0.84, "grad_norm": 1.4660124767355946, "learning_rate": 6.561184790372522e-07, "loss": 0.6277, "step": 13124 }, { "epoch": 0.84, "grad_norm": 1.4868028321919742, "learning_rate": 6.556052651509104e-07, "loss": 0.7259, "step": 13125 }, { "epoch": 0.84, "grad_norm": 1.6315308826948343, "learning_rate": 6.550922379775248e-07, "loss": 0.7487, "step": 13126 }, { "epoch": 0.84, "grad_norm": 1.3415195981814538, "learning_rate": 6.545793975391468e-07, "loss": 0.6497, "step": 13127 }, { "epoch": 0.84, "grad_norm": 1.3536378202747792, "learning_rate": 6.54066743857813e-07, "loss": 0.6419, "step": 13128 }, { "epoch": 0.84, "grad_norm": 1.4603828609122236, "learning_rate": 6.535542769555609e-07, "loss": 0.6077, "step": 13129 }, { "epoch": 0.84, "grad_norm": 1.6707119196693503, "learning_rate": 6.53041996854411e-07, "loss": 0.5839, "step": 13130 }, { "epoch": 0.84, "grad_norm": 1.8315074538134037, "learning_rate": 6.525299035763827e-07, "loss": 0.6295, "step": 13131 }, { "epoch": 0.84, "grad_norm": 1.5676174885882617, "learning_rate": 6.520179971434837e-07, "loss": 0.6133, "step": 13132 }, { "epoch": 0.84, "grad_norm": 1.4547791287806828, "learning_rate": 6.515062775777148e-07, "loss": 0.6924, "step": 13133 }, { "epoch": 0.84, "grad_norm": 1.5738642925875008, "learning_rate": 6.50994744901069e-07, "loss": 0.6597, "step": 13134 }, { "epoch": 0.84, "grad_norm": 1.6404595707575762, "learning_rate": 6.504833991355292e-07, "loss": 0.5797, "step": 13135 }, { "epoch": 0.84, "grad_norm": 1.3263251585997502, "learning_rate": 6.499722403030751e-07, "loss": 0.6708, "step": 13136 }, { "epoch": 0.84, "grad_norm": 1.5712433735437414, "learning_rate": 6.494612684256718e-07, "loss": 0.7448, "step": 13137 }, { "epoch": 0.84, "grad_norm": 1.6951374943256323, "learning_rate": 6.489504835252824e-07, "loss": 0.6357, "step": 13138 }, { "epoch": 0.84, "grad_norm": 1.6653943935519286, "learning_rate": 6.484398856238582e-07, "loss": 0.7847, "step": 13139 }, { "epoch": 0.84, "grad_norm": 0.969727839565039, "learning_rate": 6.47929474743344e-07, "loss": 0.6652, "step": 13140 }, { "epoch": 0.84, "grad_norm": 1.6590848275339587, "learning_rate": 6.474192509056759e-07, "loss": 0.6262, "step": 13141 }, { "epoch": 0.84, "grad_norm": 1.6045610439515445, "learning_rate": 6.469092141327827e-07, "loss": 0.7781, "step": 13142 }, { "epoch": 0.84, "grad_norm": 1.5744227324714566, "learning_rate": 6.463993644465843e-07, "loss": 0.5729, "step": 13143 }, { "epoch": 0.84, "grad_norm": 1.473498328909522, "learning_rate": 6.458897018689919e-07, "loss": 0.6432, "step": 13144 }, { "epoch": 0.84, "grad_norm": 1.4444680976744926, "learning_rate": 6.453802264219117e-07, "loss": 0.5857, "step": 13145 }, { "epoch": 0.84, "grad_norm": 1.423674906438191, "learning_rate": 6.448709381272395e-07, "loss": 0.6726, "step": 13146 }, { "epoch": 0.84, "grad_norm": 1.5529762561561562, "learning_rate": 6.443618370068622e-07, "loss": 0.7138, "step": 13147 }, { "epoch": 0.84, "grad_norm": 1.5373722306869002, "learning_rate": 6.438529230826612e-07, "loss": 0.7228, "step": 13148 }, { "epoch": 0.84, "grad_norm": 1.525448746733225, "learning_rate": 6.43344196376508e-07, "loss": 0.6266, "step": 13149 }, { "epoch": 0.84, "grad_norm": 1.5506744067484701, "learning_rate": 6.428356569102667e-07, "loss": 0.6455, "step": 13150 }, { "epoch": 0.84, "grad_norm": 1.3058319555628777, "learning_rate": 6.423273047057932e-07, "loss": 0.5719, "step": 13151 }, { "epoch": 0.84, "grad_norm": 1.1068261122382614, "learning_rate": 6.418191397849355e-07, "loss": 0.6456, "step": 13152 }, { "epoch": 0.84, "grad_norm": 1.742086031130859, "learning_rate": 6.413111621695322e-07, "loss": 0.7024, "step": 13153 }, { "epoch": 0.84, "grad_norm": 1.4469241862038733, "learning_rate": 6.408033718814172e-07, "loss": 0.6345, "step": 13154 }, { "epoch": 0.84, "grad_norm": 0.9600288634785149, "learning_rate": 6.402957689424139e-07, "loss": 0.6525, "step": 13155 }, { "epoch": 0.84, "grad_norm": 1.5323583749025145, "learning_rate": 6.397883533743371e-07, "loss": 0.6571, "step": 13156 }, { "epoch": 0.84, "grad_norm": 1.415847622415529, "learning_rate": 6.392811251989944e-07, "loss": 0.6461, "step": 13157 }, { "epoch": 0.84, "grad_norm": 1.7531160054963613, "learning_rate": 6.387740844381863e-07, "loss": 0.5991, "step": 13158 }, { "epoch": 0.84, "grad_norm": 1.1095958019009384, "learning_rate": 6.382672311137039e-07, "loss": 0.6359, "step": 13159 }, { "epoch": 0.84, "grad_norm": 1.660857336327207, "learning_rate": 6.377605652473301e-07, "loss": 0.693, "step": 13160 }, { "epoch": 0.84, "grad_norm": 1.1171539555835053, "learning_rate": 6.372540868608401e-07, "loss": 0.6203, "step": 13161 }, { "epoch": 0.84, "grad_norm": 1.4642690900440836, "learning_rate": 6.367477959760043e-07, "loss": 0.6657, "step": 13162 }, { "epoch": 0.84, "grad_norm": 1.6119938785669312, "learning_rate": 6.362416926145775e-07, "loss": 0.7303, "step": 13163 }, { "epoch": 0.84, "grad_norm": 1.7704873013954614, "learning_rate": 6.357357767983147e-07, "loss": 0.6768, "step": 13164 }, { "epoch": 0.84, "grad_norm": 1.7114814258921758, "learning_rate": 6.352300485489571e-07, "loss": 0.628, "step": 13165 }, { "epoch": 0.84, "grad_norm": 1.5654169656208756, "learning_rate": 6.347245078882408e-07, "loss": 0.5533, "step": 13166 }, { "epoch": 0.84, "grad_norm": 1.8378004372190184, "learning_rate": 6.342191548378923e-07, "loss": 0.6993, "step": 13167 }, { "epoch": 0.84, "grad_norm": 1.5524737218808493, "learning_rate": 6.337139894196292e-07, "loss": 0.7104, "step": 13168 }, { "epoch": 0.84, "grad_norm": 1.5167923780795496, "learning_rate": 6.33209011655167e-07, "loss": 0.5784, "step": 13169 }, { "epoch": 0.84, "grad_norm": 1.491096789438562, "learning_rate": 6.327042215662027e-07, "loss": 0.6507, "step": 13170 }, { "epoch": 0.84, "grad_norm": 1.5103625777971716, "learning_rate": 6.321996191744368e-07, "loss": 0.6333, "step": 13171 }, { "epoch": 0.84, "grad_norm": 1.6033613614711961, "learning_rate": 6.316952045015506e-07, "loss": 0.6406, "step": 13172 }, { "epoch": 0.84, "grad_norm": 1.6339737104409744, "learning_rate": 6.311909775692265e-07, "loss": 0.6647, "step": 13173 }, { "epoch": 0.84, "grad_norm": 2.1518705535439167, "learning_rate": 6.306869383991343e-07, "loss": 0.6622, "step": 13174 }, { "epoch": 0.84, "grad_norm": 1.0517670616270574, "learning_rate": 6.301830870129349e-07, "loss": 0.6712, "step": 13175 }, { "epoch": 0.84, "grad_norm": 1.463529222826232, "learning_rate": 6.296794234322867e-07, "loss": 0.7165, "step": 13176 }, { "epoch": 0.84, "grad_norm": 1.6426614538114488, "learning_rate": 6.291759476788312e-07, "loss": 0.5949, "step": 13177 }, { "epoch": 0.84, "grad_norm": 1.6457325183493532, "learning_rate": 6.286726597742116e-07, "loss": 0.63, "step": 13178 }, { "epoch": 0.84, "grad_norm": 1.4507838553307681, "learning_rate": 6.281695597400533e-07, "loss": 0.6285, "step": 13179 }, { "epoch": 0.84, "grad_norm": 1.4569216739265523, "learning_rate": 6.276666475979815e-07, "loss": 0.6604, "step": 13180 }, { "epoch": 0.84, "grad_norm": 0.97219579221513, "learning_rate": 6.271639233696103e-07, "loss": 0.7209, "step": 13181 }, { "epoch": 0.84, "grad_norm": 1.7528395842337652, "learning_rate": 6.266613870765437e-07, "loss": 0.7005, "step": 13182 }, { "epoch": 0.84, "grad_norm": 1.5173330577829927, "learning_rate": 6.261590387403832e-07, "loss": 0.6862, "step": 13183 }, { "epoch": 0.84, "grad_norm": 1.6775175436903063, "learning_rate": 6.256568783827144e-07, "loss": 0.6154, "step": 13184 }, { "epoch": 0.84, "grad_norm": 1.6065295568169817, "learning_rate": 6.251549060251233e-07, "loss": 0.6939, "step": 13185 }, { "epoch": 0.84, "grad_norm": 1.52046797622794, "learning_rate": 6.246531216891794e-07, "loss": 0.6479, "step": 13186 }, { "epoch": 0.84, "grad_norm": 1.3596734654820744, "learning_rate": 6.241515253964515e-07, "loss": 0.6724, "step": 13187 }, { "epoch": 0.84, "grad_norm": 1.5558108913985873, "learning_rate": 6.236501171684961e-07, "loss": 0.685, "step": 13188 }, { "epoch": 0.84, "grad_norm": 1.5933548543359968, "learning_rate": 6.231488970268628e-07, "loss": 0.6642, "step": 13189 }, { "epoch": 0.84, "grad_norm": 1.1315033104024217, "learning_rate": 6.226478649930928e-07, "loss": 0.6633, "step": 13190 }, { "epoch": 0.84, "grad_norm": 1.6119574587214356, "learning_rate": 6.221470210887182e-07, "loss": 0.7785, "step": 13191 }, { "epoch": 0.84, "grad_norm": 1.4816422724934861, "learning_rate": 6.216463653352678e-07, "loss": 0.6256, "step": 13192 }, { "epoch": 0.84, "grad_norm": 1.5119589576610728, "learning_rate": 6.211458977542545e-07, "loss": 0.651, "step": 13193 }, { "epoch": 0.84, "grad_norm": 1.8476358574119645, "learning_rate": 6.20645618367191e-07, "loss": 0.6865, "step": 13194 }, { "epoch": 0.84, "grad_norm": 1.210118221258668, "learning_rate": 6.201455271955747e-07, "loss": 0.6283, "step": 13195 }, { "epoch": 0.84, "grad_norm": 1.5996099282280558, "learning_rate": 6.196456242609012e-07, "loss": 0.7113, "step": 13196 }, { "epoch": 0.84, "grad_norm": 4.356945545585673, "learning_rate": 6.191459095846547e-07, "loss": 0.6399, "step": 13197 }, { "epoch": 0.84, "grad_norm": 1.5653769443721033, "learning_rate": 6.186463831883111e-07, "loss": 0.6568, "step": 13198 }, { "epoch": 0.84, "grad_norm": 1.5441585415238372, "learning_rate": 6.181470450933397e-07, "loss": 0.635, "step": 13199 }, { "epoch": 0.84, "grad_norm": 1.6773963120934618, "learning_rate": 6.176478953212001e-07, "loss": 0.7268, "step": 13200 }, { "epoch": 0.84, "grad_norm": 2.142079922519163, "learning_rate": 6.171489338933467e-07, "loss": 0.6361, "step": 13201 }, { "epoch": 0.85, "grad_norm": 1.556696841247735, "learning_rate": 6.166501608312209e-07, "loss": 0.7814, "step": 13202 }, { "epoch": 0.85, "grad_norm": 1.482869221562247, "learning_rate": 6.161515761562614e-07, "loss": 0.5986, "step": 13203 }, { "epoch": 0.85, "grad_norm": 1.457803123785916, "learning_rate": 6.156531798898951e-07, "loss": 0.6222, "step": 13204 }, { "epoch": 0.85, "grad_norm": 1.5067283748138547, "learning_rate": 6.151549720535433e-07, "loss": 0.6411, "step": 13205 }, { "epoch": 0.85, "grad_norm": 1.235597191726828, "learning_rate": 6.146569526686158e-07, "loss": 0.6838, "step": 13206 }, { "epoch": 0.85, "grad_norm": 1.5997036105341762, "learning_rate": 6.141591217565185e-07, "loss": 0.6601, "step": 13207 }, { "epoch": 0.85, "grad_norm": 1.5167602556569444, "learning_rate": 6.136614793386459e-07, "loss": 0.6354, "step": 13208 }, { "epoch": 0.85, "grad_norm": 1.744572601780379, "learning_rate": 6.131640254363847e-07, "loss": 0.7976, "step": 13209 }, { "epoch": 0.85, "grad_norm": 1.4464067540855705, "learning_rate": 6.126667600711167e-07, "loss": 0.6441, "step": 13210 }, { "epoch": 0.85, "grad_norm": 1.6051154457957282, "learning_rate": 6.121696832642126e-07, "loss": 0.6638, "step": 13211 }, { "epoch": 0.85, "grad_norm": 1.832677533067668, "learning_rate": 6.116727950370355e-07, "loss": 0.6756, "step": 13212 }, { "epoch": 0.85, "grad_norm": 1.4868386760529435, "learning_rate": 6.111760954109402e-07, "loss": 0.6057, "step": 13213 }, { "epoch": 0.85, "grad_norm": 1.6906029366897584, "learning_rate": 6.106795844072744e-07, "loss": 0.7476, "step": 13214 }, { "epoch": 0.85, "grad_norm": 1.5953752557666534, "learning_rate": 6.101832620473763e-07, "loss": 0.594, "step": 13215 }, { "epoch": 0.85, "grad_norm": 2.409589843861981, "learning_rate": 6.09687128352578e-07, "loss": 0.5396, "step": 13216 }, { "epoch": 0.85, "grad_norm": 3.8022253532521426, "learning_rate": 6.091911833441999e-07, "loss": 0.6545, "step": 13217 }, { "epoch": 0.85, "grad_norm": 1.5575575931323336, "learning_rate": 6.086954270435602e-07, "loss": 0.7248, "step": 13218 }, { "epoch": 0.85, "grad_norm": 1.3024941172620028, "learning_rate": 6.081998594719629e-07, "loss": 0.6758, "step": 13219 }, { "epoch": 0.85, "grad_norm": 1.559876264713848, "learning_rate": 6.077044806507076e-07, "loss": 0.5637, "step": 13220 }, { "epoch": 0.85, "grad_norm": 1.5734628481965824, "learning_rate": 6.07209290601084e-07, "loss": 0.5974, "step": 13221 }, { "epoch": 0.85, "grad_norm": 1.3880204371497995, "learning_rate": 6.06714289344375e-07, "loss": 0.6018, "step": 13222 }, { "epoch": 0.85, "grad_norm": 1.4939735394283535, "learning_rate": 6.062194769018542e-07, "loss": 0.5981, "step": 13223 }, { "epoch": 0.85, "grad_norm": 1.6729034336367532, "learning_rate": 6.057248532947862e-07, "loss": 0.6595, "step": 13224 }, { "epoch": 0.85, "grad_norm": 1.5191021377550642, "learning_rate": 6.05230418544433e-07, "loss": 0.6222, "step": 13225 }, { "epoch": 0.85, "grad_norm": 1.5901533845547178, "learning_rate": 6.04736172672039e-07, "loss": 0.6543, "step": 13226 }, { "epoch": 0.85, "grad_norm": 1.4491664487252602, "learning_rate": 6.042421156988498e-07, "loss": 0.6385, "step": 13227 }, { "epoch": 0.85, "grad_norm": 1.2517129149584352, "learning_rate": 6.037482476460981e-07, "loss": 0.604, "step": 13228 }, { "epoch": 0.85, "grad_norm": 2.2270455260868913, "learning_rate": 6.032545685350088e-07, "loss": 0.7185, "step": 13229 }, { "epoch": 0.85, "grad_norm": 1.644242497267712, "learning_rate": 6.027610783867993e-07, "loss": 0.8129, "step": 13230 }, { "epoch": 0.85, "grad_norm": 1.102576135568934, "learning_rate": 6.022677772226781e-07, "loss": 0.6976, "step": 13231 }, { "epoch": 0.85, "grad_norm": 1.5165154825259188, "learning_rate": 6.017746650638479e-07, "loss": 0.6331, "step": 13232 }, { "epoch": 0.85, "grad_norm": 1.4880604254796614, "learning_rate": 6.012817419314992e-07, "loss": 0.5997, "step": 13233 }, { "epoch": 0.85, "grad_norm": 1.1961395421289995, "learning_rate": 6.007890078468204e-07, "loss": 0.6504, "step": 13234 }, { "epoch": 0.85, "grad_norm": 1.4989871287807943, "learning_rate": 6.002964628309838e-07, "loss": 0.697, "step": 13235 }, { "epoch": 0.85, "grad_norm": 1.086790895241557, "learning_rate": 5.998041069051624e-07, "loss": 0.5686, "step": 13236 }, { "epoch": 0.85, "grad_norm": 1.8038604755532743, "learning_rate": 5.993119400905123e-07, "loss": 0.646, "step": 13237 }, { "epoch": 0.85, "grad_norm": 1.210281108822425, "learning_rate": 5.988199624081887e-07, "loss": 0.526, "step": 13238 }, { "epoch": 0.85, "grad_norm": 1.6941820597395385, "learning_rate": 5.983281738793351e-07, "loss": 0.6351, "step": 13239 }, { "epoch": 0.85, "grad_norm": 1.5152645925946366, "learning_rate": 5.978365745250863e-07, "loss": 0.6617, "step": 13240 }, { "epoch": 0.85, "grad_norm": 1.5060418927484258, "learning_rate": 5.973451643665734e-07, "loss": 0.645, "step": 13241 }, { "epoch": 0.85, "grad_norm": 1.6244827036138634, "learning_rate": 5.968539434249121e-07, "loss": 0.7969, "step": 13242 }, { "epoch": 0.85, "grad_norm": 1.1719041333698448, "learning_rate": 5.963629117212183e-07, "loss": 0.6539, "step": 13243 }, { "epoch": 0.85, "grad_norm": 1.5422873349555384, "learning_rate": 5.958720692765913e-07, "loss": 0.6994, "step": 13244 }, { "epoch": 0.85, "grad_norm": 1.0334208206544553, "learning_rate": 5.95381416112129e-07, "loss": 0.5582, "step": 13245 }, { "epoch": 0.85, "grad_norm": 1.5381018660985208, "learning_rate": 5.948909522489182e-07, "loss": 0.7008, "step": 13246 }, { "epoch": 0.85, "grad_norm": 1.4218610181361393, "learning_rate": 5.944006777080363e-07, "loss": 0.7247, "step": 13247 }, { "epoch": 0.85, "grad_norm": 1.5076324677194846, "learning_rate": 5.939105925105587e-07, "loss": 0.7435, "step": 13248 }, { "epoch": 0.85, "grad_norm": 1.1071003841801639, "learning_rate": 5.934206966775429e-07, "loss": 0.6285, "step": 13249 }, { "epoch": 0.85, "grad_norm": 2.6224783409724806, "learning_rate": 5.929309902300484e-07, "loss": 0.6987, "step": 13250 }, { "epoch": 0.85, "grad_norm": 1.3915084930414205, "learning_rate": 5.924414731891171e-07, "loss": 0.633, "step": 13251 }, { "epoch": 0.85, "grad_norm": 1.5925759149771146, "learning_rate": 5.919521455757909e-07, "loss": 0.7278, "step": 13252 }, { "epoch": 0.85, "grad_norm": 1.5831262576194771, "learning_rate": 5.914630074110989e-07, "loss": 0.651, "step": 13253 }, { "epoch": 0.85, "grad_norm": 1.8113324581468664, "learning_rate": 5.909740587160629e-07, "loss": 0.7015, "step": 13254 }, { "epoch": 0.85, "grad_norm": 1.590803683076782, "learning_rate": 5.904852995116977e-07, "loss": 0.7201, "step": 13255 }, { "epoch": 0.85, "grad_norm": 1.8287161053909167, "learning_rate": 5.899967298190073e-07, "loss": 0.6591, "step": 13256 }, { "epoch": 0.85, "grad_norm": 1.6869389780233992, "learning_rate": 5.895083496589932e-07, "loss": 0.7026, "step": 13257 }, { "epoch": 0.85, "grad_norm": 1.60868528200116, "learning_rate": 5.890201590526401e-07, "loss": 0.7068, "step": 13258 }, { "epoch": 0.85, "grad_norm": 1.371822128758147, "learning_rate": 5.885321580209324e-07, "loss": 0.6479, "step": 13259 }, { "epoch": 0.85, "grad_norm": 1.4393695211094963, "learning_rate": 5.880443465848435e-07, "loss": 0.6448, "step": 13260 }, { "epoch": 0.85, "grad_norm": 1.8899774926168431, "learning_rate": 5.875567247653374e-07, "loss": 0.6168, "step": 13261 }, { "epoch": 0.85, "grad_norm": 1.76228418228212, "learning_rate": 5.870692925833721e-07, "loss": 0.6323, "step": 13262 }, { "epoch": 0.85, "grad_norm": 1.5475583705730043, "learning_rate": 5.865820500598951e-07, "loss": 0.6656, "step": 13263 }, { "epoch": 0.85, "grad_norm": 1.4665620206666692, "learning_rate": 5.860949972158481e-07, "loss": 0.583, "step": 13264 }, { "epoch": 0.85, "grad_norm": 1.5281799219956405, "learning_rate": 5.85608134072162e-07, "loss": 0.6654, "step": 13265 }, { "epoch": 0.85, "grad_norm": 1.4058404309498724, "learning_rate": 5.851214606497635e-07, "loss": 0.5743, "step": 13266 }, { "epoch": 0.85, "grad_norm": 1.9034696590787523, "learning_rate": 5.846349769695675e-07, "loss": 0.7117, "step": 13267 }, { "epoch": 0.85, "grad_norm": 1.9749560322729371, "learning_rate": 5.841486830524823e-07, "loss": 0.6751, "step": 13268 }, { "epoch": 0.85, "grad_norm": 1.530564109840117, "learning_rate": 5.836625789194078e-07, "loss": 0.6437, "step": 13269 }, { "epoch": 0.85, "grad_norm": 1.5851855547868798, "learning_rate": 5.831766645912357e-07, "loss": 0.6415, "step": 13270 }, { "epoch": 0.85, "grad_norm": 1.5914209705540368, "learning_rate": 5.826909400888492e-07, "loss": 0.6267, "step": 13271 }, { "epoch": 0.85, "grad_norm": 1.1242527870672594, "learning_rate": 5.822054054331244e-07, "loss": 0.6186, "step": 13272 }, { "epoch": 0.85, "grad_norm": 1.6619169051517317, "learning_rate": 5.81720060644928e-07, "loss": 0.697, "step": 13273 }, { "epoch": 0.85, "grad_norm": 1.4624788305474319, "learning_rate": 5.812349057451183e-07, "loss": 0.6696, "step": 13274 }, { "epoch": 0.85, "grad_norm": 1.4137621349611207, "learning_rate": 5.807499407545486e-07, "loss": 0.653, "step": 13275 }, { "epoch": 0.85, "grad_norm": 1.6855357087909588, "learning_rate": 5.8026516569406e-07, "loss": 0.6468, "step": 13276 }, { "epoch": 0.85, "grad_norm": 1.6759174820245513, "learning_rate": 5.797805805844875e-07, "loss": 0.6743, "step": 13277 }, { "epoch": 0.85, "grad_norm": 1.6208746904667504, "learning_rate": 5.792961854466572e-07, "loss": 0.6603, "step": 13278 }, { "epoch": 0.85, "grad_norm": 1.5046320245019469, "learning_rate": 5.788119803013881e-07, "loss": 0.6176, "step": 13279 }, { "epoch": 0.85, "grad_norm": 1.5303175838923195, "learning_rate": 5.783279651694895e-07, "loss": 0.6937, "step": 13280 }, { "epoch": 0.85, "grad_norm": 1.4981116668515726, "learning_rate": 5.778441400717644e-07, "loss": 0.6281, "step": 13281 }, { "epoch": 0.85, "grad_norm": 1.913331111181455, "learning_rate": 5.773605050290043e-07, "loss": 0.6611, "step": 13282 }, { "epoch": 0.85, "grad_norm": 1.7167212474726858, "learning_rate": 5.768770600619978e-07, "loss": 0.6111, "step": 13283 }, { "epoch": 0.85, "grad_norm": 1.424588743437712, "learning_rate": 5.763938051915208e-07, "loss": 0.6854, "step": 13284 }, { "epoch": 0.85, "grad_norm": 1.6977644342197287, "learning_rate": 5.759107404383429e-07, "loss": 0.5634, "step": 13285 }, { "epoch": 0.85, "grad_norm": 1.1736447281095777, "learning_rate": 5.754278658232249e-07, "loss": 0.7314, "step": 13286 }, { "epoch": 0.85, "grad_norm": 1.093077931670026, "learning_rate": 5.749451813669205e-07, "loss": 0.6378, "step": 13287 }, { "epoch": 0.85, "grad_norm": 1.6119667611642776, "learning_rate": 5.744626870901731e-07, "loss": 0.5895, "step": 13288 }, { "epoch": 0.85, "grad_norm": 1.1890855964494476, "learning_rate": 5.739803830137192e-07, "loss": 0.6432, "step": 13289 }, { "epoch": 0.85, "grad_norm": 1.6647782975518823, "learning_rate": 5.734982691582907e-07, "loss": 0.6034, "step": 13290 }, { "epoch": 0.85, "grad_norm": 1.6282199494948577, "learning_rate": 5.730163455446025e-07, "loss": 0.6724, "step": 13291 }, { "epoch": 0.85, "grad_norm": 1.7661016441950712, "learning_rate": 5.725346121933712e-07, "loss": 0.6518, "step": 13292 }, { "epoch": 0.85, "grad_norm": 1.7744544961352908, "learning_rate": 5.720530691252979e-07, "loss": 0.6635, "step": 13293 }, { "epoch": 0.85, "grad_norm": 1.8891803944218373, "learning_rate": 5.715717163610801e-07, "loss": 0.7788, "step": 13294 }, { "epoch": 0.85, "grad_norm": 1.5871232196581868, "learning_rate": 5.71090553921404e-07, "loss": 0.6768, "step": 13295 }, { "epoch": 0.85, "grad_norm": 1.5705189175540797, "learning_rate": 5.706095818269485e-07, "loss": 0.685, "step": 13296 }, { "epoch": 0.85, "grad_norm": 1.1589599074821622, "learning_rate": 5.701288000983884e-07, "loss": 0.7598, "step": 13297 }, { "epoch": 0.85, "grad_norm": 1.693582938059845, "learning_rate": 5.696482087563814e-07, "loss": 0.6461, "step": 13298 }, { "epoch": 0.85, "grad_norm": 1.5268078395796603, "learning_rate": 5.691678078215873e-07, "loss": 0.6248, "step": 13299 }, { "epoch": 0.85, "grad_norm": 1.3982137523784357, "learning_rate": 5.686875973146477e-07, "loss": 0.6843, "step": 13300 }, { "epoch": 0.85, "grad_norm": 1.6466828770085238, "learning_rate": 5.682075772562051e-07, "loss": 0.5655, "step": 13301 }, { "epoch": 0.85, "grad_norm": 1.0275940517157047, "learning_rate": 5.677277476668886e-07, "loss": 0.6714, "step": 13302 }, { "epoch": 0.85, "grad_norm": 2.049000559600766, "learning_rate": 5.672481085673199e-07, "loss": 0.5797, "step": 13303 }, { "epoch": 0.85, "grad_norm": 1.5006345140817046, "learning_rate": 5.66768659978113e-07, "loss": 0.6563, "step": 13304 }, { "epoch": 0.85, "grad_norm": 2.0004398927891605, "learning_rate": 5.662894019198722e-07, "loss": 0.6997, "step": 13305 }, { "epoch": 0.85, "grad_norm": 1.8788622503864465, "learning_rate": 5.658103344131988e-07, "loss": 0.6441, "step": 13306 }, { "epoch": 0.85, "grad_norm": 1.6398207472817767, "learning_rate": 5.653314574786778e-07, "loss": 0.7093, "step": 13307 }, { "epoch": 0.85, "grad_norm": 1.3689796243026455, "learning_rate": 5.648527711368939e-07, "loss": 0.7123, "step": 13308 }, { "epoch": 0.85, "grad_norm": 1.5994769585257453, "learning_rate": 5.643742754084164e-07, "loss": 0.6465, "step": 13309 }, { "epoch": 0.85, "grad_norm": 1.4160072071459358, "learning_rate": 5.63895970313813e-07, "loss": 0.6943, "step": 13310 }, { "epoch": 0.85, "grad_norm": 1.7274615863111409, "learning_rate": 5.634178558736397e-07, "loss": 0.6646, "step": 13311 }, { "epoch": 0.85, "grad_norm": 1.6991538571009546, "learning_rate": 5.629399321084428e-07, "loss": 0.7296, "step": 13312 }, { "epoch": 0.85, "grad_norm": 1.4740753999798952, "learning_rate": 5.624621990387669e-07, "loss": 0.681, "step": 13313 }, { "epoch": 0.85, "grad_norm": 1.942762964453949, "learning_rate": 5.619846566851384e-07, "loss": 0.6745, "step": 13314 }, { "epoch": 0.85, "grad_norm": 1.4822641531318035, "learning_rate": 5.615073050680859e-07, "loss": 0.7062, "step": 13315 }, { "epoch": 0.85, "grad_norm": 1.6183087960548124, "learning_rate": 5.610301442081212e-07, "loss": 0.7475, "step": 13316 }, { "epoch": 0.85, "grad_norm": 1.1166470788186564, "learning_rate": 5.605531741257536e-07, "loss": 0.6857, "step": 13317 }, { "epoch": 0.85, "grad_norm": 1.3220318724963602, "learning_rate": 5.600763948414828e-07, "loss": 0.5771, "step": 13318 }, { "epoch": 0.85, "grad_norm": 1.4822941102745335, "learning_rate": 5.595998063757985e-07, "loss": 0.6457, "step": 13319 }, { "epoch": 0.85, "grad_norm": 1.3904614552054184, "learning_rate": 5.591234087491842e-07, "loss": 0.6492, "step": 13320 }, { "epoch": 0.85, "grad_norm": 1.6669205864044316, "learning_rate": 5.586472019821132e-07, "loss": 0.7104, "step": 13321 }, { "epoch": 0.85, "grad_norm": 1.8472319199896772, "learning_rate": 5.581711860950551e-07, "loss": 0.7289, "step": 13322 }, { "epoch": 0.85, "grad_norm": 1.5889117540042825, "learning_rate": 5.576953611084635e-07, "loss": 0.6997, "step": 13323 }, { "epoch": 0.85, "grad_norm": 1.5089717358040364, "learning_rate": 5.57219727042792e-07, "loss": 0.6638, "step": 13324 }, { "epoch": 0.85, "grad_norm": 1.5475629990994422, "learning_rate": 5.567442839184805e-07, "loss": 0.6918, "step": 13325 }, { "epoch": 0.85, "grad_norm": 1.336094811381551, "learning_rate": 5.562690317559639e-07, "loss": 0.6166, "step": 13326 }, { "epoch": 0.85, "grad_norm": 1.3460794927494837, "learning_rate": 5.557939705756665e-07, "loss": 0.6475, "step": 13327 }, { "epoch": 0.85, "grad_norm": 1.5786888412373756, "learning_rate": 5.553191003980052e-07, "loss": 0.6411, "step": 13328 }, { "epoch": 0.85, "grad_norm": 1.4110498950355552, "learning_rate": 5.548444212433901e-07, "loss": 0.6466, "step": 13329 }, { "epoch": 0.85, "grad_norm": 1.54235304370574, "learning_rate": 5.543699331322194e-07, "loss": 0.6961, "step": 13330 }, { "epoch": 0.85, "grad_norm": 1.8098782190551863, "learning_rate": 5.538956360848891e-07, "loss": 0.7588, "step": 13331 }, { "epoch": 0.85, "grad_norm": 1.6369530574076934, "learning_rate": 5.534215301217816e-07, "loss": 0.678, "step": 13332 }, { "epoch": 0.85, "grad_norm": 1.4823749617364659, "learning_rate": 5.529476152632735e-07, "loss": 0.6219, "step": 13333 }, { "epoch": 0.85, "grad_norm": 1.5367805574089504, "learning_rate": 5.524738915297317e-07, "loss": 0.6852, "step": 13334 }, { "epoch": 0.85, "grad_norm": 1.6387580318055388, "learning_rate": 5.520003589415168e-07, "loss": 0.6235, "step": 13335 }, { "epoch": 0.85, "grad_norm": 1.9671120150855281, "learning_rate": 5.515270175189802e-07, "loss": 0.5798, "step": 13336 }, { "epoch": 0.85, "grad_norm": 1.664530182778098, "learning_rate": 5.510538672824645e-07, "loss": 0.6287, "step": 13337 }, { "epoch": 0.85, "grad_norm": 1.5796560892532396, "learning_rate": 5.50580908252304e-07, "loss": 0.6684, "step": 13338 }, { "epoch": 0.85, "grad_norm": 1.8957285306213492, "learning_rate": 5.501081404488279e-07, "loss": 0.7469, "step": 13339 }, { "epoch": 0.85, "grad_norm": 1.4143444917393366, "learning_rate": 5.496355638923534e-07, "loss": 0.6645, "step": 13340 }, { "epoch": 0.85, "grad_norm": 2.0411546741500484, "learning_rate": 5.491631786031904e-07, "loss": 0.5899, "step": 13341 }, { "epoch": 0.85, "grad_norm": 2.3310164754432074, "learning_rate": 5.486909846016419e-07, "loss": 0.6056, "step": 13342 }, { "epoch": 0.85, "grad_norm": 1.7475336711779212, "learning_rate": 5.482189819080014e-07, "loss": 0.5627, "step": 13343 }, { "epoch": 0.85, "grad_norm": 1.4093477988115903, "learning_rate": 5.477471705425541e-07, "loss": 0.6421, "step": 13344 }, { "epoch": 0.85, "grad_norm": 1.7332669830663623, "learning_rate": 5.472755505255783e-07, "loss": 0.6098, "step": 13345 }, { "epoch": 0.85, "grad_norm": 1.3861835196531596, "learning_rate": 5.468041218773429e-07, "loss": 0.6724, "step": 13346 }, { "epoch": 0.85, "grad_norm": 1.6479755198507688, "learning_rate": 5.463328846181081e-07, "loss": 0.6452, "step": 13347 }, { "epoch": 0.85, "grad_norm": 1.5357505311158035, "learning_rate": 5.45861838768128e-07, "loss": 0.6222, "step": 13348 }, { "epoch": 0.85, "grad_norm": 2.171927575136414, "learning_rate": 5.453909843476468e-07, "loss": 0.6531, "step": 13349 }, { "epoch": 0.85, "grad_norm": 1.4556153808931849, "learning_rate": 5.449203213769011e-07, "loss": 0.5663, "step": 13350 }, { "epoch": 0.85, "grad_norm": 1.6064506871094715, "learning_rate": 5.444498498761181e-07, "loss": 0.6823, "step": 13351 }, { "epoch": 0.85, "grad_norm": 1.3869004401295641, "learning_rate": 5.439795698655182e-07, "loss": 0.637, "step": 13352 }, { "epoch": 0.85, "grad_norm": 1.4024846780716054, "learning_rate": 5.43509481365313e-07, "loss": 0.7441, "step": 13353 }, { "epoch": 0.85, "grad_norm": 1.7410942124111053, "learning_rate": 5.430395843957054e-07, "loss": 0.6583, "step": 13354 }, { "epoch": 0.85, "grad_norm": 1.7644383856052703, "learning_rate": 5.425698789768924e-07, "loss": 0.7388, "step": 13355 }, { "epoch": 0.85, "grad_norm": 1.7429930497274486, "learning_rate": 5.42100365129058e-07, "loss": 0.5942, "step": 13356 }, { "epoch": 0.85, "grad_norm": 1.6373417107001322, "learning_rate": 5.416310428723837e-07, "loss": 0.6893, "step": 13357 }, { "epoch": 0.85, "grad_norm": 1.5813137982477985, "learning_rate": 5.411619122270384e-07, "loss": 0.6241, "step": 13358 }, { "epoch": 0.86, "grad_norm": 1.6688339550272773, "learning_rate": 5.40692973213185e-07, "loss": 0.6709, "step": 13359 }, { "epoch": 0.86, "grad_norm": 1.6928865821711236, "learning_rate": 5.402242258509777e-07, "loss": 0.6325, "step": 13360 }, { "epoch": 0.86, "grad_norm": 1.4929953172574044, "learning_rate": 5.397556701605605e-07, "loss": 0.6686, "step": 13361 }, { "epoch": 0.86, "grad_norm": 1.573357827588173, "learning_rate": 5.39287306162074e-07, "loss": 0.6001, "step": 13362 }, { "epoch": 0.86, "grad_norm": 1.600543334528774, "learning_rate": 5.388191338756443e-07, "loss": 0.61, "step": 13363 }, { "epoch": 0.86, "grad_norm": 1.7175342794915371, "learning_rate": 5.383511533213959e-07, "loss": 0.6054, "step": 13364 }, { "epoch": 0.86, "grad_norm": 1.5099556236426825, "learning_rate": 5.378833645194375e-07, "loss": 0.6377, "step": 13365 }, { "epoch": 0.86, "grad_norm": 1.444119805868164, "learning_rate": 5.374157674898772e-07, "loss": 0.6581, "step": 13366 }, { "epoch": 0.86, "grad_norm": 1.903239136517278, "learning_rate": 5.369483622528104e-07, "loss": 0.6811, "step": 13367 }, { "epoch": 0.86, "grad_norm": 1.5053258739965891, "learning_rate": 5.364811488283233e-07, "loss": 0.6676, "step": 13368 }, { "epoch": 0.86, "grad_norm": 1.59566092428634, "learning_rate": 5.360141272364994e-07, "loss": 0.7126, "step": 13369 }, { "epoch": 0.86, "grad_norm": 1.6398365488815705, "learning_rate": 5.355472974974057e-07, "loss": 0.7014, "step": 13370 }, { "epoch": 0.86, "grad_norm": 1.220887647273128, "learning_rate": 5.350806596311109e-07, "loss": 0.6548, "step": 13371 }, { "epoch": 0.86, "grad_norm": 1.5433089770502446, "learning_rate": 5.346142136576649e-07, "loss": 0.576, "step": 13372 }, { "epoch": 0.86, "grad_norm": 1.547640301945586, "learning_rate": 5.341479595971177e-07, "loss": 0.6916, "step": 13373 }, { "epoch": 0.86, "grad_norm": 1.4089162976805816, "learning_rate": 5.336818974695073e-07, "loss": 0.6225, "step": 13374 }, { "epoch": 0.86, "grad_norm": 1.8030205826078234, "learning_rate": 5.332160272948628e-07, "loss": 0.7591, "step": 13375 }, { "epoch": 0.86, "grad_norm": 1.5770296690484302, "learning_rate": 5.327503490932095e-07, "loss": 0.7385, "step": 13376 }, { "epoch": 0.86, "grad_norm": 1.6570443532519887, "learning_rate": 5.322848628845578e-07, "loss": 0.6335, "step": 13377 }, { "epoch": 0.86, "grad_norm": 1.4129622676847724, "learning_rate": 5.31819568688916e-07, "loss": 0.5379, "step": 13378 }, { "epoch": 0.86, "grad_norm": 1.6957777523391597, "learning_rate": 5.313544665262782e-07, "loss": 0.7633, "step": 13379 }, { "epoch": 0.86, "grad_norm": 1.52113243071591, "learning_rate": 5.308895564166372e-07, "loss": 0.7036, "step": 13380 }, { "epoch": 0.86, "grad_norm": 1.7012373012815702, "learning_rate": 5.304248383799715e-07, "loss": 0.6623, "step": 13381 }, { "epoch": 0.86, "grad_norm": 1.9300061801148685, "learning_rate": 5.299603124362546e-07, "loss": 0.6341, "step": 13382 }, { "epoch": 0.86, "grad_norm": 1.6666501389673347, "learning_rate": 5.294959786054505e-07, "loss": 0.7342, "step": 13383 }, { "epoch": 0.86, "grad_norm": 1.2722299233389172, "learning_rate": 5.290318369075153e-07, "loss": 0.7776, "step": 13384 }, { "epoch": 0.86, "grad_norm": 1.5908970161163765, "learning_rate": 5.285678873623973e-07, "loss": 0.7848, "step": 13385 }, { "epoch": 0.86, "grad_norm": 1.6521558825569638, "learning_rate": 5.281041299900347e-07, "loss": 0.7206, "step": 13386 }, { "epoch": 0.86, "grad_norm": 1.1655358961413673, "learning_rate": 5.276405648103616e-07, "loss": 0.6499, "step": 13387 }, { "epoch": 0.86, "grad_norm": 1.4746027678170945, "learning_rate": 5.271771918432977e-07, "loss": 0.6657, "step": 13388 }, { "epoch": 0.86, "grad_norm": 1.5687060516159999, "learning_rate": 5.267140111087604e-07, "loss": 0.5868, "step": 13389 }, { "epoch": 0.86, "grad_norm": 2.0049393952725474, "learning_rate": 5.262510226266548e-07, "loss": 0.6213, "step": 13390 }, { "epoch": 0.86, "grad_norm": 1.4229016866718482, "learning_rate": 5.257882264168795e-07, "loss": 0.6174, "step": 13391 }, { "epoch": 0.86, "grad_norm": 1.907769084344628, "learning_rate": 5.253256224993252e-07, "loss": 0.7187, "step": 13392 }, { "epoch": 0.86, "grad_norm": 0.9966025539391403, "learning_rate": 5.248632108938728e-07, "loss": 0.5632, "step": 13393 }, { "epoch": 0.86, "grad_norm": 1.5856689628876366, "learning_rate": 5.244009916203957e-07, "loss": 0.6295, "step": 13394 }, { "epoch": 0.86, "grad_norm": 1.6564160410485138, "learning_rate": 5.239389646987592e-07, "loss": 0.6402, "step": 13395 }, { "epoch": 0.86, "grad_norm": 1.670391906014392, "learning_rate": 5.234771301488206e-07, "loss": 0.6922, "step": 13396 }, { "epoch": 0.86, "grad_norm": 1.6249294072244918, "learning_rate": 5.230154879904292e-07, "loss": 0.6558, "step": 13397 }, { "epoch": 0.86, "grad_norm": 1.5058593815350123, "learning_rate": 5.22554038243424e-07, "loss": 0.6609, "step": 13398 }, { "epoch": 0.86, "grad_norm": 2.274507983515752, "learning_rate": 5.220927809276383e-07, "loss": 0.7338, "step": 13399 }, { "epoch": 0.86, "grad_norm": 1.5534583724653785, "learning_rate": 5.216317160628959e-07, "loss": 0.6471, "step": 13400 }, { "epoch": 0.86, "grad_norm": 1.6152964813462058, "learning_rate": 5.211708436690111e-07, "loss": 0.7394, "step": 13401 }, { "epoch": 0.86, "grad_norm": 1.4167665152916573, "learning_rate": 5.207101637657929e-07, "loss": 0.6492, "step": 13402 }, { "epoch": 0.86, "grad_norm": 1.6312105105624455, "learning_rate": 5.20249676373038e-07, "loss": 0.6943, "step": 13403 }, { "epoch": 0.86, "grad_norm": 1.7328157658312175, "learning_rate": 5.197893815105398e-07, "loss": 0.739, "step": 13404 }, { "epoch": 0.86, "grad_norm": 1.5569435546948638, "learning_rate": 5.193292791980803e-07, "loss": 0.6301, "step": 13405 }, { "epoch": 0.86, "grad_norm": 1.1464080016163432, "learning_rate": 5.188693694554325e-07, "loss": 0.6311, "step": 13406 }, { "epoch": 0.86, "grad_norm": 1.466184462938427, "learning_rate": 5.184096523023635e-07, "loss": 0.5977, "step": 13407 }, { "epoch": 0.86, "grad_norm": 1.4203363000389877, "learning_rate": 5.179501277586308e-07, "loss": 0.6843, "step": 13408 }, { "epoch": 0.86, "grad_norm": 1.5609289280104788, "learning_rate": 5.174907958439829e-07, "loss": 0.6911, "step": 13409 }, { "epoch": 0.86, "grad_norm": 1.617095442432338, "learning_rate": 5.170316565781608e-07, "loss": 0.6939, "step": 13410 }, { "epoch": 0.86, "grad_norm": 1.5778959960917271, "learning_rate": 5.165727099808998e-07, "loss": 0.6419, "step": 13411 }, { "epoch": 0.86, "grad_norm": 3.4393209384215497, "learning_rate": 5.161139560719214e-07, "loss": 0.6328, "step": 13412 }, { "epoch": 0.86, "grad_norm": 1.1156836765271596, "learning_rate": 5.156553948709436e-07, "loss": 0.6626, "step": 13413 }, { "epoch": 0.86, "grad_norm": 1.7265927899744153, "learning_rate": 5.151970263976741e-07, "loss": 0.6816, "step": 13414 }, { "epoch": 0.86, "grad_norm": 1.069483147962504, "learning_rate": 5.147388506718127e-07, "loss": 0.6047, "step": 13415 }, { "epoch": 0.86, "grad_norm": 1.5049563091713511, "learning_rate": 5.142808677130506e-07, "loss": 0.6712, "step": 13416 }, { "epoch": 0.86, "grad_norm": 1.8652353355808842, "learning_rate": 5.138230775410713e-07, "loss": 0.7205, "step": 13417 }, { "epoch": 0.86, "grad_norm": 1.4575208988258443, "learning_rate": 5.133654801755489e-07, "loss": 0.6439, "step": 13418 }, { "epoch": 0.86, "grad_norm": 1.740200355815197, "learning_rate": 5.129080756361499e-07, "loss": 0.6771, "step": 13419 }, { "epoch": 0.86, "grad_norm": 1.4543832387548348, "learning_rate": 5.124508639425352e-07, "loss": 0.6836, "step": 13420 }, { "epoch": 0.86, "grad_norm": 1.466841861815051, "learning_rate": 5.119938451143502e-07, "loss": 0.6732, "step": 13421 }, { "epoch": 0.86, "grad_norm": 1.7451271586612853, "learning_rate": 5.115370191712404e-07, "loss": 0.6354, "step": 13422 }, { "epoch": 0.86, "grad_norm": 1.3063281379768699, "learning_rate": 5.110803861328378e-07, "loss": 0.6671, "step": 13423 }, { "epoch": 0.86, "grad_norm": 1.438108392176727, "learning_rate": 5.106239460187679e-07, "loss": 0.7175, "step": 13424 }, { "epoch": 0.86, "grad_norm": 1.8443529527644582, "learning_rate": 5.10167698848647e-07, "loss": 0.6951, "step": 13425 }, { "epoch": 0.86, "grad_norm": 1.4632255988511036, "learning_rate": 5.097116446420835e-07, "loss": 0.6657, "step": 13426 }, { "epoch": 0.86, "grad_norm": 2.1619724019939697, "learning_rate": 5.092557834186795e-07, "loss": 0.6785, "step": 13427 }, { "epoch": 0.86, "grad_norm": 1.679539054463686, "learning_rate": 5.088001151980243e-07, "loss": 0.7719, "step": 13428 }, { "epoch": 0.86, "grad_norm": 1.4868565333636854, "learning_rate": 5.083446399997044e-07, "loss": 0.6007, "step": 13429 }, { "epoch": 0.86, "grad_norm": 1.4605365163963626, "learning_rate": 5.078893578432914e-07, "loss": 0.6135, "step": 13430 }, { "epoch": 0.86, "grad_norm": 1.4672509053973337, "learning_rate": 5.074342687483557e-07, "loss": 0.6641, "step": 13431 }, { "epoch": 0.86, "grad_norm": 1.8676915858519874, "learning_rate": 5.069793727344552e-07, "loss": 0.685, "step": 13432 }, { "epoch": 0.86, "grad_norm": 1.1705822267725061, "learning_rate": 5.06524669821139e-07, "loss": 0.6878, "step": 13433 }, { "epoch": 0.86, "grad_norm": 1.4186573926412749, "learning_rate": 5.060701600279527e-07, "loss": 0.6107, "step": 13434 }, { "epoch": 0.86, "grad_norm": 1.6192589840620368, "learning_rate": 5.056158433744251e-07, "loss": 0.8172, "step": 13435 }, { "epoch": 0.86, "grad_norm": 1.9396976073441057, "learning_rate": 5.051617198800873e-07, "loss": 0.6942, "step": 13436 }, { "epoch": 0.86, "grad_norm": 1.6107291186693578, "learning_rate": 5.047077895644514e-07, "loss": 0.5638, "step": 13437 }, { "epoch": 0.86, "grad_norm": 1.7172149708983497, "learning_rate": 5.042540524470302e-07, "loss": 0.6883, "step": 13438 }, { "epoch": 0.86, "grad_norm": 1.3860608431002839, "learning_rate": 5.038005085473224e-07, "loss": 0.6087, "step": 13439 }, { "epoch": 0.86, "grad_norm": 1.5130019184842136, "learning_rate": 5.033471578848203e-07, "loss": 0.5945, "step": 13440 }, { "epoch": 0.86, "grad_norm": 4.041711693965073, "learning_rate": 5.028940004790106e-07, "loss": 0.6404, "step": 13441 }, { "epoch": 0.86, "grad_norm": 1.4632106051491522, "learning_rate": 5.024410363493648e-07, "loss": 0.6599, "step": 13442 }, { "epoch": 0.86, "grad_norm": 1.6106644537103736, "learning_rate": 5.019882655153547e-07, "loss": 0.619, "step": 13443 }, { "epoch": 0.86, "grad_norm": 1.513540910320033, "learning_rate": 5.015356879964356e-07, "loss": 0.6198, "step": 13444 }, { "epoch": 0.86, "grad_norm": 1.4166881015427284, "learning_rate": 5.010833038120605e-07, "loss": 0.6321, "step": 13445 }, { "epoch": 0.86, "grad_norm": 1.5994397435492922, "learning_rate": 5.006311129816721e-07, "loss": 0.7121, "step": 13446 }, { "epoch": 0.86, "grad_norm": 1.7255758728542654, "learning_rate": 5.001791155247032e-07, "loss": 0.7942, "step": 13447 }, { "epoch": 0.86, "grad_norm": 1.653084564331268, "learning_rate": 4.997273114605811e-07, "loss": 0.7123, "step": 13448 }, { "epoch": 0.86, "grad_norm": 1.4480059845102238, "learning_rate": 4.992757008087218e-07, "loss": 0.6291, "step": 13449 }, { "epoch": 0.86, "grad_norm": 1.7989528841620632, "learning_rate": 4.988242835885371e-07, "loss": 0.7669, "step": 13450 }, { "epoch": 0.86, "grad_norm": 1.5776638632958377, "learning_rate": 4.983730598194247e-07, "loss": 0.6057, "step": 13451 }, { "epoch": 0.86, "grad_norm": 1.6526335364103832, "learning_rate": 4.979220295207804e-07, "loss": 0.6979, "step": 13452 }, { "epoch": 0.86, "grad_norm": 1.0676498566281971, "learning_rate": 4.974711927119863e-07, "loss": 0.6703, "step": 13453 }, { "epoch": 0.86, "grad_norm": 1.5614264488776726, "learning_rate": 4.970205494124198e-07, "loss": 0.6028, "step": 13454 }, { "epoch": 0.86, "grad_norm": 2.209213209811523, "learning_rate": 4.96570099641448e-07, "loss": 0.7092, "step": 13455 }, { "epoch": 0.86, "grad_norm": 1.4581240857285243, "learning_rate": 4.9611984341843e-07, "loss": 0.6137, "step": 13456 }, { "epoch": 0.86, "grad_norm": 1.4939814852091304, "learning_rate": 4.95669780762717e-07, "loss": 0.6967, "step": 13457 }, { "epoch": 0.86, "grad_norm": 1.6511765225758672, "learning_rate": 4.952199116936524e-07, "loss": 0.6199, "step": 13458 }, { "epoch": 0.86, "grad_norm": 0.9727306133978887, "learning_rate": 4.9477023623057e-07, "loss": 0.5415, "step": 13459 }, { "epoch": 0.86, "grad_norm": 1.694264517441553, "learning_rate": 4.943207543927947e-07, "loss": 0.6306, "step": 13460 }, { "epoch": 0.86, "grad_norm": 1.2176679482780695, "learning_rate": 4.938714661996469e-07, "loss": 0.7458, "step": 13461 }, { "epoch": 0.86, "grad_norm": 1.4072960482772048, "learning_rate": 4.93422371670435e-07, "loss": 0.6536, "step": 13462 }, { "epoch": 0.86, "grad_norm": 1.0104080138294336, "learning_rate": 4.929734708244599e-07, "loss": 0.5998, "step": 13463 }, { "epoch": 0.86, "grad_norm": 1.3520516970522556, "learning_rate": 4.925247636810144e-07, "loss": 0.6022, "step": 13464 }, { "epoch": 0.86, "grad_norm": 1.669784797378229, "learning_rate": 4.920762502593829e-07, "loss": 0.7566, "step": 13465 }, { "epoch": 0.86, "grad_norm": 1.7965200574284932, "learning_rate": 4.916279305788419e-07, "loss": 0.7365, "step": 13466 }, { "epoch": 0.86, "grad_norm": 2.098039750412734, "learning_rate": 4.911798046586591e-07, "loss": 0.7202, "step": 13467 }, { "epoch": 0.86, "grad_norm": 1.6205548285662672, "learning_rate": 4.907318725180926e-07, "loss": 0.6024, "step": 13468 }, { "epoch": 0.86, "grad_norm": 1.5310727286133035, "learning_rate": 4.902841341763964e-07, "loss": 0.6356, "step": 13469 }, { "epoch": 0.86, "grad_norm": 1.2687381937257094, "learning_rate": 4.898365896528113e-07, "loss": 0.6306, "step": 13470 }, { "epoch": 0.86, "grad_norm": 1.082548228798898, "learning_rate": 4.893892389665728e-07, "loss": 0.7242, "step": 13471 }, { "epoch": 0.86, "grad_norm": 1.492816615164849, "learning_rate": 4.889420821369067e-07, "loss": 0.6754, "step": 13472 }, { "epoch": 0.86, "grad_norm": 1.563907583743692, "learning_rate": 4.884951191830312e-07, "loss": 0.6404, "step": 13473 }, { "epoch": 0.86, "grad_norm": 1.5787235505772967, "learning_rate": 4.880483501241551e-07, "loss": 0.6439, "step": 13474 }, { "epoch": 0.86, "grad_norm": 1.7351776342820093, "learning_rate": 4.876017749794787e-07, "loss": 0.7955, "step": 13475 }, { "epoch": 0.86, "grad_norm": 1.4764193396494516, "learning_rate": 4.871553937681989e-07, "loss": 0.6479, "step": 13476 }, { "epoch": 0.86, "grad_norm": 1.1932440979913717, "learning_rate": 4.867092065094947e-07, "loss": 0.6498, "step": 13477 }, { "epoch": 0.86, "grad_norm": 1.5459112871528222, "learning_rate": 4.862632132225464e-07, "loss": 0.7208, "step": 13478 }, { "epoch": 0.86, "grad_norm": 1.6866743952480518, "learning_rate": 4.858174139265209e-07, "loss": 0.5749, "step": 13479 }, { "epoch": 0.86, "grad_norm": 1.5963707480508822, "learning_rate": 4.853718086405768e-07, "loss": 0.6926, "step": 13480 }, { "epoch": 0.86, "grad_norm": 1.7456322397214892, "learning_rate": 4.849263973838664e-07, "loss": 0.727, "step": 13481 }, { "epoch": 0.86, "grad_norm": 1.6452229456468874, "learning_rate": 4.844811801755306e-07, "loss": 0.6787, "step": 13482 }, { "epoch": 0.86, "grad_norm": 1.7222893954502592, "learning_rate": 4.840361570347069e-07, "loss": 0.7401, "step": 13483 }, { "epoch": 0.86, "grad_norm": 1.4510372406444179, "learning_rate": 4.835913279805182e-07, "loss": 0.5511, "step": 13484 }, { "epoch": 0.86, "grad_norm": 1.4093348482763663, "learning_rate": 4.831466930320861e-07, "loss": 0.6772, "step": 13485 }, { "epoch": 0.86, "grad_norm": 1.5956275951833911, "learning_rate": 4.827022522085157e-07, "loss": 0.5923, "step": 13486 }, { "epoch": 0.86, "grad_norm": 2.373781980443504, "learning_rate": 4.822580055289117e-07, "loss": 0.5643, "step": 13487 }, { "epoch": 0.86, "grad_norm": 1.6815201426248993, "learning_rate": 4.81813953012365e-07, "loss": 0.772, "step": 13488 }, { "epoch": 0.86, "grad_norm": 1.6097594178871018, "learning_rate": 4.813700946779598e-07, "loss": 0.6414, "step": 13489 }, { "epoch": 0.86, "grad_norm": 1.5237528198049801, "learning_rate": 4.809264305447752e-07, "loss": 0.7333, "step": 13490 }, { "epoch": 0.86, "grad_norm": 1.601511401031058, "learning_rate": 4.804829606318745e-07, "loss": 0.6095, "step": 13491 }, { "epoch": 0.86, "grad_norm": 1.6787328895192624, "learning_rate": 4.800396849583211e-07, "loss": 0.698, "step": 13492 }, { "epoch": 0.86, "grad_norm": 1.683456840345952, "learning_rate": 4.795966035431626e-07, "loss": 0.6429, "step": 13493 }, { "epoch": 0.86, "grad_norm": 2.138885378995515, "learning_rate": 4.791537164054444e-07, "loss": 0.6088, "step": 13494 }, { "epoch": 0.86, "grad_norm": 1.582829937244929, "learning_rate": 4.787110235642001e-07, "loss": 0.6455, "step": 13495 }, { "epoch": 0.86, "grad_norm": 1.524489043559725, "learning_rate": 4.78268525038455e-07, "loss": 0.6277, "step": 13496 }, { "epoch": 0.86, "grad_norm": 1.3902924855003984, "learning_rate": 4.778262208472273e-07, "loss": 0.6446, "step": 13497 }, { "epoch": 0.86, "grad_norm": 1.463231387402933, "learning_rate": 4.773841110095257e-07, "loss": 0.685, "step": 13498 }, { "epoch": 0.86, "grad_norm": 1.5019192913168606, "learning_rate": 4.769421955443531e-07, "loss": 0.6869, "step": 13499 }, { "epoch": 0.86, "grad_norm": 1.4641979366735356, "learning_rate": 4.7650047447069916e-07, "loss": 0.6195, "step": 13500 }, { "epoch": 0.86, "grad_norm": 1.534191198873307, "learning_rate": 4.7605894780755113e-07, "loss": 0.6901, "step": 13501 }, { "epoch": 0.86, "grad_norm": 1.3357887143434581, "learning_rate": 4.7561761557388176e-07, "loss": 0.6391, "step": 13502 }, { "epoch": 0.86, "grad_norm": 1.516162483960538, "learning_rate": 4.751764777886614e-07, "loss": 0.5693, "step": 13503 }, { "epoch": 0.86, "grad_norm": 1.663809929613146, "learning_rate": 4.7473553447084764e-07, "loss": 0.6248, "step": 13504 }, { "epoch": 0.86, "grad_norm": 1.5807610297930779, "learning_rate": 4.742947856393909e-07, "loss": 0.6183, "step": 13505 }, { "epoch": 0.86, "grad_norm": 1.5665825549398098, "learning_rate": 4.738542313132366e-07, "loss": 0.6435, "step": 13506 }, { "epoch": 0.86, "grad_norm": 1.4923469475293456, "learning_rate": 4.734138715113146e-07, "loss": 0.6047, "step": 13507 }, { "epoch": 0.86, "grad_norm": 1.5498680324170988, "learning_rate": 4.7297370625255466e-07, "loss": 0.7111, "step": 13508 }, { "epoch": 0.86, "grad_norm": 1.6490982956863667, "learning_rate": 4.7253373555587014e-07, "loss": 0.6999, "step": 13509 }, { "epoch": 0.86, "grad_norm": 1.7145773350708293, "learning_rate": 4.7209395944017354e-07, "loss": 0.6672, "step": 13510 }, { "epoch": 0.86, "grad_norm": 1.4713910999721986, "learning_rate": 4.7165437792436373e-07, "loss": 0.5865, "step": 13511 }, { "epoch": 0.86, "grad_norm": 1.46909219657156, "learning_rate": 4.712149910273334e-07, "loss": 0.5632, "step": 13512 }, { "epoch": 0.86, "grad_norm": 1.506717814491946, "learning_rate": 4.7077579876796675e-07, "loss": 0.7044, "step": 13513 }, { "epoch": 0.86, "grad_norm": 1.649763370126165, "learning_rate": 4.7033680116513824e-07, "loss": 0.6665, "step": 13514 }, { "epoch": 0.87, "grad_norm": 1.4407665865640984, "learning_rate": 4.698979982377172e-07, "loss": 0.6894, "step": 13515 }, { "epoch": 0.87, "grad_norm": 1.7036482930716337, "learning_rate": 4.6945939000455966e-07, "loss": 0.6913, "step": 13516 }, { "epoch": 0.87, "grad_norm": 1.4962656355696071, "learning_rate": 4.690209764845183e-07, "loss": 0.6821, "step": 13517 }, { "epoch": 0.87, "grad_norm": 1.7058018256783714, "learning_rate": 4.685827576964347e-07, "loss": 0.7487, "step": 13518 }, { "epoch": 0.87, "grad_norm": 1.3366579073395255, "learning_rate": 4.6814473365914217e-07, "loss": 0.6601, "step": 13519 }, { "epoch": 0.87, "grad_norm": 1.6073437585743215, "learning_rate": 4.6770690439146617e-07, "loss": 0.6936, "step": 13520 }, { "epoch": 0.87, "grad_norm": 1.4152263918036094, "learning_rate": 4.6726926991222386e-07, "loss": 0.6413, "step": 13521 }, { "epoch": 0.87, "grad_norm": 1.5401403697274114, "learning_rate": 4.668318302402236e-07, "loss": 0.6577, "step": 13522 }, { "epoch": 0.87, "grad_norm": 1.4805741745242074, "learning_rate": 4.663945853942653e-07, "loss": 0.6143, "step": 13523 }, { "epoch": 0.87, "grad_norm": 1.6259838132655053, "learning_rate": 4.659575353931417e-07, "loss": 0.6566, "step": 13524 }, { "epoch": 0.87, "grad_norm": 1.6574207201397473, "learning_rate": 4.655206802556361e-07, "loss": 0.6463, "step": 13525 }, { "epoch": 0.87, "grad_norm": 1.9330072729232113, "learning_rate": 4.650840200005236e-07, "loss": 0.6468, "step": 13526 }, { "epoch": 0.87, "grad_norm": 1.4553908840647247, "learning_rate": 4.646475546465706e-07, "loss": 0.6195, "step": 13527 }, { "epoch": 0.87, "grad_norm": 1.626900634796925, "learning_rate": 4.642112842125357e-07, "loss": 0.7643, "step": 13528 }, { "epoch": 0.87, "grad_norm": 1.4666337683108037, "learning_rate": 4.6377520871716874e-07, "loss": 0.6375, "step": 13529 }, { "epoch": 0.87, "grad_norm": 1.6147720686339577, "learning_rate": 4.633393281792109e-07, "loss": 0.6005, "step": 13530 }, { "epoch": 0.87, "grad_norm": 1.6156146027687812, "learning_rate": 4.629036426173955e-07, "loss": 0.6581, "step": 13531 }, { "epoch": 0.87, "grad_norm": 1.5532066380530185, "learning_rate": 4.624681520504498e-07, "loss": 0.6478, "step": 13532 }, { "epoch": 0.87, "grad_norm": 1.5765829152832058, "learning_rate": 4.6203285649708605e-07, "loss": 0.6125, "step": 13533 }, { "epoch": 0.87, "grad_norm": 1.370423630801755, "learning_rate": 4.6159775597601543e-07, "loss": 0.6221, "step": 13534 }, { "epoch": 0.87, "grad_norm": 1.1739578074035153, "learning_rate": 4.6116285050593733e-07, "loss": 0.5977, "step": 13535 }, { "epoch": 0.87, "grad_norm": 1.5335053092357989, "learning_rate": 4.607281401055419e-07, "loss": 0.6512, "step": 13536 }, { "epoch": 0.87, "grad_norm": 1.5752566277003386, "learning_rate": 4.6029362479351303e-07, "loss": 0.6559, "step": 13537 }, { "epoch": 0.87, "grad_norm": 1.5214991133756595, "learning_rate": 4.598593045885247e-07, "loss": 0.7137, "step": 13538 }, { "epoch": 0.87, "grad_norm": 1.6505246585821873, "learning_rate": 4.5942517950924305e-07, "loss": 0.6228, "step": 13539 }, { "epoch": 0.87, "grad_norm": 1.6367269340966044, "learning_rate": 4.5899124957432597e-07, "loss": 0.6147, "step": 13540 }, { "epoch": 0.87, "grad_norm": 1.5529025372894945, "learning_rate": 4.5855751480242404e-07, "loss": 0.6608, "step": 13541 }, { "epoch": 0.87, "grad_norm": 1.240461382381005, "learning_rate": 4.5812397521217577e-07, "loss": 0.5763, "step": 13542 }, { "epoch": 0.87, "grad_norm": 1.594168341500912, "learning_rate": 4.5769063082221623e-07, "loss": 0.6194, "step": 13543 }, { "epoch": 0.87, "grad_norm": 1.801519614693253, "learning_rate": 4.572574816511688e-07, "loss": 0.6751, "step": 13544 }, { "epoch": 0.87, "grad_norm": 1.399099806931158, "learning_rate": 4.568245277176486e-07, "loss": 0.6094, "step": 13545 }, { "epoch": 0.87, "grad_norm": 1.5339683426805135, "learning_rate": 4.5639176904026417e-07, "loss": 0.6469, "step": 13546 }, { "epoch": 0.87, "grad_norm": 1.3767258466196453, "learning_rate": 4.559592056376133e-07, "loss": 0.6081, "step": 13547 }, { "epoch": 0.87, "grad_norm": 1.302330392828672, "learning_rate": 4.5552683752828896e-07, "loss": 0.5693, "step": 13548 }, { "epoch": 0.87, "grad_norm": 1.482984360340545, "learning_rate": 4.5509466473087017e-07, "loss": 0.5677, "step": 13549 }, { "epoch": 0.87, "grad_norm": 1.7174672403338624, "learning_rate": 4.546626872639343e-07, "loss": 0.6634, "step": 13550 }, { "epoch": 0.87, "grad_norm": 1.4748842682931684, "learning_rate": 4.5423090514604305e-07, "loss": 0.585, "step": 13551 }, { "epoch": 0.87, "grad_norm": 1.438183776295628, "learning_rate": 4.5379931839575673e-07, "loss": 0.5951, "step": 13552 }, { "epoch": 0.87, "grad_norm": 1.547229474006434, "learning_rate": 4.533679270316227e-07, "loss": 0.6448, "step": 13553 }, { "epoch": 0.87, "grad_norm": 1.466458895041283, "learning_rate": 4.529367310721805e-07, "loss": 0.7037, "step": 13554 }, { "epoch": 0.87, "grad_norm": 1.0340334202774295, "learning_rate": 4.525057305359648e-07, "loss": 0.5733, "step": 13555 }, { "epoch": 0.87, "grad_norm": 1.322654437124988, "learning_rate": 4.5207492544149525e-07, "loss": 0.6567, "step": 13556 }, { "epoch": 0.87, "grad_norm": 1.4657424987930088, "learning_rate": 4.5164431580729087e-07, "loss": 0.6857, "step": 13557 }, { "epoch": 0.87, "grad_norm": 1.6609765516821835, "learning_rate": 4.512139016518541e-07, "loss": 0.6438, "step": 13558 }, { "epoch": 0.87, "grad_norm": 1.5587036928973235, "learning_rate": 4.507836829936868e-07, "loss": 0.6538, "step": 13559 }, { "epoch": 0.87, "grad_norm": 1.6674657601475371, "learning_rate": 4.503536598512775e-07, "loss": 0.5997, "step": 13560 }, { "epoch": 0.87, "grad_norm": 1.599742205149543, "learning_rate": 4.499238322431071e-07, "loss": 0.6538, "step": 13561 }, { "epoch": 0.87, "grad_norm": 1.6152880013006183, "learning_rate": 4.4949420018765065e-07, "loss": 0.7286, "step": 13562 }, { "epoch": 0.87, "grad_norm": 1.6840919328914319, "learning_rate": 4.4906476370337017e-07, "loss": 0.6709, "step": 13563 }, { "epoch": 0.87, "grad_norm": 1.925025023701142, "learning_rate": 4.4863552280872523e-07, "loss": 0.677, "step": 13564 }, { "epoch": 0.87, "grad_norm": 1.9512991418721652, "learning_rate": 4.482064775221595e-07, "loss": 0.6311, "step": 13565 }, { "epoch": 0.87, "grad_norm": 1.6529606815735218, "learning_rate": 4.47777627862116e-07, "loss": 0.6722, "step": 13566 }, { "epoch": 0.87, "grad_norm": 1.1422133749031176, "learning_rate": 4.4734897384702434e-07, "loss": 0.672, "step": 13567 }, { "epoch": 0.87, "grad_norm": 1.4236658274721241, "learning_rate": 4.469205154953077e-07, "loss": 0.7043, "step": 13568 }, { "epoch": 0.87, "grad_norm": 1.6235490649269582, "learning_rate": 4.4649225282537954e-07, "loss": 0.7439, "step": 13569 }, { "epoch": 0.87, "grad_norm": 1.9933177206193358, "learning_rate": 4.4606418585564526e-07, "loss": 0.6124, "step": 13570 }, { "epoch": 0.87, "grad_norm": 1.5431204144557988, "learning_rate": 4.4563631460450506e-07, "loss": 0.6684, "step": 13571 }, { "epoch": 0.87, "grad_norm": 1.5818778682684382, "learning_rate": 4.452086390903443e-07, "loss": 0.6129, "step": 13572 }, { "epoch": 0.87, "grad_norm": 1.677852434281891, "learning_rate": 4.4478115933154597e-07, "loss": 0.5871, "step": 13573 }, { "epoch": 0.87, "grad_norm": 1.3971951821023436, "learning_rate": 4.443538753464821e-07, "loss": 0.6572, "step": 13574 }, { "epoch": 0.87, "grad_norm": 1.4810379909772078, "learning_rate": 4.439267871535152e-07, "loss": 0.7295, "step": 13575 }, { "epoch": 0.87, "grad_norm": 1.7606487379241433, "learning_rate": 4.4349989477100174e-07, "loss": 0.7573, "step": 13576 }, { "epoch": 0.87, "grad_norm": 1.6701222309144188, "learning_rate": 4.430731982172887e-07, "loss": 0.7496, "step": 13577 }, { "epoch": 0.87, "grad_norm": 1.5207694796214442, "learning_rate": 4.4264669751071353e-07, "loss": 0.7415, "step": 13578 }, { "epoch": 0.87, "grad_norm": 1.109272607853449, "learning_rate": 4.4222039266960616e-07, "loss": 0.6448, "step": 13579 }, { "epoch": 0.87, "grad_norm": 1.7160781882524836, "learning_rate": 4.4179428371229125e-07, "loss": 0.6807, "step": 13580 }, { "epoch": 0.87, "grad_norm": 1.7588648036457701, "learning_rate": 4.413683706570776e-07, "loss": 0.7229, "step": 13581 }, { "epoch": 0.87, "grad_norm": 1.0951880809027608, "learning_rate": 4.409426535222738e-07, "loss": 0.5959, "step": 13582 }, { "epoch": 0.87, "grad_norm": 1.5695479659740568, "learning_rate": 4.4051713232617423e-07, "loss": 0.6727, "step": 13583 }, { "epoch": 0.87, "grad_norm": 1.6480825073509877, "learning_rate": 4.40091807087068e-07, "loss": 0.7015, "step": 13584 }, { "epoch": 0.87, "grad_norm": 1.2868379373205359, "learning_rate": 4.396666778232345e-07, "loss": 0.6587, "step": 13585 }, { "epoch": 0.87, "grad_norm": 1.5146805272072068, "learning_rate": 4.39241744552944e-07, "loss": 0.65, "step": 13586 }, { "epoch": 0.87, "grad_norm": 1.5471526505235604, "learning_rate": 4.388170072944603e-07, "loss": 0.5823, "step": 13587 }, { "epoch": 0.87, "grad_norm": 1.5736235791569582, "learning_rate": 4.38392466066036e-07, "loss": 0.7229, "step": 13588 }, { "epoch": 0.87, "grad_norm": 1.5693495103333186, "learning_rate": 4.3796812088591934e-07, "loss": 0.6668, "step": 13589 }, { "epoch": 0.87, "grad_norm": 1.3664568047970576, "learning_rate": 4.375439717723462e-07, "loss": 0.7286, "step": 13590 }, { "epoch": 0.87, "grad_norm": 1.5557894744786305, "learning_rate": 4.37120018743547e-07, "loss": 0.6933, "step": 13591 }, { "epoch": 0.87, "grad_norm": 1.0992663051962726, "learning_rate": 4.366962618177406e-07, "loss": 0.7214, "step": 13592 }, { "epoch": 0.87, "grad_norm": 1.694848428923769, "learning_rate": 4.362727010131407e-07, "loss": 0.6476, "step": 13593 }, { "epoch": 0.87, "grad_norm": 1.6553795008127792, "learning_rate": 4.358493363479499e-07, "loss": 0.6483, "step": 13594 }, { "epoch": 0.87, "grad_norm": 1.1813505976087453, "learning_rate": 4.354261678403643e-07, "loss": 0.5495, "step": 13595 }, { "epoch": 0.87, "grad_norm": 1.985041387976293, "learning_rate": 4.350031955085698e-07, "loss": 0.7778, "step": 13596 }, { "epoch": 0.87, "grad_norm": 1.0510824089214275, "learning_rate": 4.345804193707459e-07, "loss": 0.6887, "step": 13597 }, { "epoch": 0.87, "grad_norm": 1.7516491172431161, "learning_rate": 4.3415783944506294e-07, "loss": 0.6583, "step": 13598 }, { "epoch": 0.87, "grad_norm": 1.0489001821445756, "learning_rate": 4.3373545574968143e-07, "loss": 0.5759, "step": 13599 }, { "epoch": 0.87, "grad_norm": 1.6155717954058904, "learning_rate": 4.333132683027552e-07, "loss": 0.697, "step": 13600 }, { "epoch": 0.87, "grad_norm": 1.5255970789531033, "learning_rate": 4.3289127712242864e-07, "loss": 0.7135, "step": 13601 }, { "epoch": 0.87, "grad_norm": 1.8996300606814094, "learning_rate": 4.3246948222683826e-07, "loss": 0.6827, "step": 13602 }, { "epoch": 0.87, "grad_norm": 1.7164937337801653, "learning_rate": 4.320478836341113e-07, "loss": 0.6631, "step": 13603 }, { "epoch": 0.87, "grad_norm": 1.6344817362343078, "learning_rate": 4.3162648136236885e-07, "loss": 0.6992, "step": 13604 }, { "epoch": 0.87, "grad_norm": 1.6271184388857138, "learning_rate": 4.312052754297197e-07, "loss": 0.7859, "step": 13605 }, { "epoch": 0.87, "grad_norm": 1.1767208643368365, "learning_rate": 4.3078426585426826e-07, "loss": 0.5937, "step": 13606 }, { "epoch": 0.87, "grad_norm": 1.7962025412276412, "learning_rate": 4.303634526541067e-07, "loss": 0.6836, "step": 13607 }, { "epoch": 0.87, "grad_norm": 2.1201121470021853, "learning_rate": 4.2994283584732286e-07, "loss": 0.6388, "step": 13608 }, { "epoch": 0.87, "grad_norm": 1.7512490500401983, "learning_rate": 4.2952241545199226e-07, "loss": 0.6652, "step": 13609 }, { "epoch": 0.87, "grad_norm": 1.549189145954367, "learning_rate": 4.291021914861848e-07, "loss": 0.6601, "step": 13610 }, { "epoch": 0.87, "grad_norm": 1.734074515880489, "learning_rate": 4.2868216396796057e-07, "loss": 0.6977, "step": 13611 }, { "epoch": 0.87, "grad_norm": 1.0426524561601636, "learning_rate": 4.282623329153701e-07, "loss": 0.5952, "step": 13612 }, { "epoch": 0.87, "grad_norm": 1.8138284986371436, "learning_rate": 4.2784269834645955e-07, "loss": 0.7456, "step": 13613 }, { "epoch": 0.87, "grad_norm": 0.9915334837348974, "learning_rate": 4.2742326027926106e-07, "loss": 0.6907, "step": 13614 }, { "epoch": 0.87, "grad_norm": 1.5889631677576717, "learning_rate": 4.270040187318031e-07, "loss": 0.7228, "step": 13615 }, { "epoch": 0.87, "grad_norm": 0.959991576893368, "learning_rate": 4.265849737221034e-07, "loss": 0.6836, "step": 13616 }, { "epoch": 0.87, "grad_norm": 1.421417120494936, "learning_rate": 4.2616612526817146e-07, "loss": 0.634, "step": 13617 }, { "epoch": 0.87, "grad_norm": 1.4009183564563001, "learning_rate": 4.257474733880085e-07, "loss": 0.6316, "step": 13618 }, { "epoch": 0.87, "grad_norm": 1.8782282832621449, "learning_rate": 4.253290180996067e-07, "loss": 0.714, "step": 13619 }, { "epoch": 0.87, "grad_norm": 2.5625425331686893, "learning_rate": 4.249107594209523e-07, "loss": 0.6911, "step": 13620 }, { "epoch": 0.87, "grad_norm": 1.5973130518499186, "learning_rate": 4.244926973700181e-07, "loss": 0.5878, "step": 13621 }, { "epoch": 0.87, "grad_norm": 1.6046144790918782, "learning_rate": 4.240748319647753e-07, "loss": 0.6222, "step": 13622 }, { "epoch": 0.87, "grad_norm": 1.791154877718218, "learning_rate": 4.2365716322317894e-07, "loss": 0.7425, "step": 13623 }, { "epoch": 0.87, "grad_norm": 2.2917431071724366, "learning_rate": 4.2323969116318255e-07, "loss": 0.7044, "step": 13624 }, { "epoch": 0.87, "grad_norm": 1.552409132638742, "learning_rate": 4.228224158027272e-07, "loss": 0.6021, "step": 13625 }, { "epoch": 0.87, "grad_norm": 1.6330588037474627, "learning_rate": 4.224053371597453e-07, "loss": 0.64, "step": 13626 }, { "epoch": 0.87, "grad_norm": 1.4746023375123247, "learning_rate": 4.2198845525216524e-07, "loss": 0.6347, "step": 13627 }, { "epoch": 0.87, "grad_norm": 1.4001736181701792, "learning_rate": 4.215717700978994e-07, "loss": 0.6124, "step": 13628 }, { "epoch": 0.87, "grad_norm": 1.5040815443930111, "learning_rate": 4.2115528171486063e-07, "loss": 0.7136, "step": 13629 }, { "epoch": 0.87, "grad_norm": 1.564878933986446, "learning_rate": 4.207389901209441e-07, "loss": 0.5933, "step": 13630 }, { "epoch": 0.87, "grad_norm": 1.6237388600942895, "learning_rate": 4.2032289533404434e-07, "loss": 0.6583, "step": 13631 }, { "epoch": 0.87, "grad_norm": 1.588929413918087, "learning_rate": 4.199069973720438e-07, "loss": 0.6284, "step": 13632 }, { "epoch": 0.87, "grad_norm": 1.729004073417129, "learning_rate": 4.194912962528158e-07, "loss": 0.6848, "step": 13633 }, { "epoch": 0.87, "grad_norm": 1.509634752929009, "learning_rate": 4.190757919942273e-07, "loss": 0.6202, "step": 13634 }, { "epoch": 0.87, "grad_norm": 1.7166278339782488, "learning_rate": 4.1866048461413454e-07, "loss": 0.6785, "step": 13635 }, { "epoch": 0.87, "grad_norm": 1.5155409761759329, "learning_rate": 4.1824537413038937e-07, "loss": 0.7599, "step": 13636 }, { "epoch": 0.87, "grad_norm": 1.0916172771672739, "learning_rate": 4.17830460560828e-07, "loss": 0.6631, "step": 13637 }, { "epoch": 0.87, "grad_norm": 1.5191643784804363, "learning_rate": 4.174157439232862e-07, "loss": 0.6642, "step": 13638 }, { "epoch": 0.87, "grad_norm": 1.222034401184431, "learning_rate": 4.170012242355864e-07, "loss": 0.7621, "step": 13639 }, { "epoch": 0.87, "grad_norm": 1.1733080686353434, "learning_rate": 4.1658690151554383e-07, "loss": 0.5894, "step": 13640 }, { "epoch": 0.87, "grad_norm": 1.6210380674859208, "learning_rate": 4.161727757809653e-07, "loss": 0.7286, "step": 13641 }, { "epoch": 0.87, "grad_norm": 1.2190165568690257, "learning_rate": 4.1575884704964874e-07, "loss": 0.6917, "step": 13642 }, { "epoch": 0.87, "grad_norm": 2.070471296330956, "learning_rate": 4.153451153393839e-07, "loss": 0.6881, "step": 13643 }, { "epoch": 0.87, "grad_norm": 1.5917612983089655, "learning_rate": 4.149315806679516e-07, "loss": 0.6274, "step": 13644 }, { "epoch": 0.87, "grad_norm": 1.7453113988589621, "learning_rate": 4.145182430531264e-07, "loss": 0.702, "step": 13645 }, { "epoch": 0.87, "grad_norm": 1.014169429555203, "learning_rate": 4.141051025126719e-07, "loss": 0.7061, "step": 13646 }, { "epoch": 0.87, "grad_norm": 1.5187177711407167, "learning_rate": 4.1369215906434337e-07, "loss": 0.6369, "step": 13647 }, { "epoch": 0.87, "grad_norm": 1.4828988322629193, "learning_rate": 4.132794127258888e-07, "loss": 0.5667, "step": 13648 }, { "epoch": 0.87, "grad_norm": 1.562890178463212, "learning_rate": 4.1286686351504735e-07, "loss": 0.5242, "step": 13649 }, { "epoch": 0.87, "grad_norm": 1.9859347529037332, "learning_rate": 4.1245451144954873e-07, "loss": 0.8063, "step": 13650 }, { "epoch": 0.87, "grad_norm": 1.4809617678768583, "learning_rate": 4.120423565471154e-07, "loss": 0.7225, "step": 13651 }, { "epoch": 0.87, "grad_norm": 1.395846234337381, "learning_rate": 4.116303988254616e-07, "loss": 0.6817, "step": 13652 }, { "epoch": 0.87, "grad_norm": 1.6413049618094107, "learning_rate": 4.1121863830229037e-07, "loss": 0.6298, "step": 13653 }, { "epoch": 0.87, "grad_norm": 1.6587633475648595, "learning_rate": 4.108070749953008e-07, "loss": 0.6498, "step": 13654 }, { "epoch": 0.87, "grad_norm": 1.4002923982079385, "learning_rate": 4.1039570892217993e-07, "loss": 0.6451, "step": 13655 }, { "epoch": 0.87, "grad_norm": 1.699572232636424, "learning_rate": 4.0998454010060695e-07, "loss": 0.7029, "step": 13656 }, { "epoch": 0.87, "grad_norm": 1.5292227894061052, "learning_rate": 4.0957356854825436e-07, "loss": 0.6967, "step": 13657 }, { "epoch": 0.87, "grad_norm": 1.7160534502353195, "learning_rate": 4.09162794282783e-07, "loss": 0.6682, "step": 13658 }, { "epoch": 0.87, "grad_norm": 2.0001705750978798, "learning_rate": 4.087522173218489e-07, "loss": 0.6914, "step": 13659 }, { "epoch": 0.87, "grad_norm": 1.5444302179619473, "learning_rate": 4.0834183768309666e-07, "loss": 0.6015, "step": 13660 }, { "epoch": 0.87, "grad_norm": 1.1672350662780997, "learning_rate": 4.079316553841628e-07, "loss": 0.7098, "step": 13661 }, { "epoch": 0.87, "grad_norm": 1.6844134431033329, "learning_rate": 4.075216704426782e-07, "loss": 0.6786, "step": 13662 }, { "epoch": 0.87, "grad_norm": 1.7969629062355827, "learning_rate": 4.0711188287626156e-07, "loss": 0.6216, "step": 13663 }, { "epoch": 0.87, "grad_norm": 1.4855883326429744, "learning_rate": 4.06702292702526e-07, "loss": 0.5883, "step": 13664 }, { "epoch": 0.87, "grad_norm": 1.8396144393857317, "learning_rate": 4.062928999390736e-07, "loss": 0.5687, "step": 13665 }, { "epoch": 0.87, "grad_norm": 2.697963272417237, "learning_rate": 4.0588370460349967e-07, "loss": 0.7105, "step": 13666 }, { "epoch": 0.87, "grad_norm": 1.1759942477591174, "learning_rate": 4.0547470671339075e-07, "loss": 0.6558, "step": 13667 }, { "epoch": 0.87, "grad_norm": 2.327811341773405, "learning_rate": 4.050659062863233e-07, "loss": 0.7066, "step": 13668 }, { "epoch": 0.87, "grad_norm": 1.675395844245185, "learning_rate": 4.0465730333986983e-07, "loss": 0.6903, "step": 13669 }, { "epoch": 0.87, "grad_norm": 1.5322701954536808, "learning_rate": 4.0424889789158763e-07, "loss": 0.5758, "step": 13670 }, { "epoch": 0.88, "grad_norm": 2.471693836644521, "learning_rate": 4.0384068995903136e-07, "loss": 0.6984, "step": 13671 }, { "epoch": 0.88, "grad_norm": 1.4375589798072004, "learning_rate": 4.0343267955974487e-07, "loss": 0.6232, "step": 13672 }, { "epoch": 0.88, "grad_norm": 1.7407509999692865, "learning_rate": 4.03024866711263e-07, "loss": 0.6714, "step": 13673 }, { "epoch": 0.88, "grad_norm": 1.5507810159069508, "learning_rate": 4.026172514311122e-07, "loss": 0.6496, "step": 13674 }, { "epoch": 0.88, "grad_norm": 1.6204281399829086, "learning_rate": 4.0220983373681133e-07, "loss": 0.6598, "step": 13675 }, { "epoch": 0.88, "grad_norm": 1.704399990928192, "learning_rate": 4.018026136458719e-07, "loss": 0.6784, "step": 13676 }, { "epoch": 0.88, "grad_norm": 1.1042501111830971, "learning_rate": 4.013955911757922e-07, "loss": 0.7251, "step": 13677 }, { "epoch": 0.88, "grad_norm": 1.499969196976168, "learning_rate": 4.0098876634406925e-07, "loss": 0.674, "step": 13678 }, { "epoch": 0.88, "grad_norm": 1.8014166623514236, "learning_rate": 4.00582139168183e-07, "loss": 0.7186, "step": 13679 }, { "epoch": 0.88, "grad_norm": 1.1608615805476135, "learning_rate": 4.001757096656128e-07, "loss": 0.7742, "step": 13680 }, { "epoch": 0.88, "grad_norm": 1.6412598340884939, "learning_rate": 3.997694778538247e-07, "loss": 0.5928, "step": 13681 }, { "epoch": 0.88, "grad_norm": 1.5461468414147974, "learning_rate": 3.993634437502775e-07, "loss": 0.739, "step": 13682 }, { "epoch": 0.88, "grad_norm": 1.8210587674807555, "learning_rate": 3.9895760737242384e-07, "loss": 0.6358, "step": 13683 }, { "epoch": 0.88, "grad_norm": 1.3961237796362078, "learning_rate": 3.985519687377026e-07, "loss": 0.6801, "step": 13684 }, { "epoch": 0.88, "grad_norm": 2.0739492437304294, "learning_rate": 3.9814652786355046e-07, "loss": 0.6927, "step": 13685 }, { "epoch": 0.88, "grad_norm": 1.5449569139417263, "learning_rate": 3.9774128476738836e-07, "loss": 0.6549, "step": 13686 }, { "epoch": 0.88, "grad_norm": 1.712272553933158, "learning_rate": 3.973362394666369e-07, "loss": 0.6323, "step": 13687 }, { "epoch": 0.88, "grad_norm": 1.4319469997001801, "learning_rate": 3.969313919787016e-07, "loss": 0.6633, "step": 13688 }, { "epoch": 0.88, "grad_norm": 1.6386356387870282, "learning_rate": 3.965267423209834e-07, "loss": 0.6835, "step": 13689 }, { "epoch": 0.88, "grad_norm": 1.5000581901729584, "learning_rate": 3.961222905108719e-07, "loss": 0.5699, "step": 13690 }, { "epoch": 0.88, "grad_norm": 1.382773677327752, "learning_rate": 3.957180365657498e-07, "loss": 0.5982, "step": 13691 }, { "epoch": 0.88, "grad_norm": 0.9853315390818375, "learning_rate": 3.953139805029932e-07, "loss": 0.6471, "step": 13692 }, { "epoch": 0.88, "grad_norm": 1.4308867607255324, "learning_rate": 3.949101223399643e-07, "loss": 0.684, "step": 13693 }, { "epoch": 0.88, "grad_norm": 1.5979943111071, "learning_rate": 3.945064620940231e-07, "loss": 0.719, "step": 13694 }, { "epoch": 0.88, "grad_norm": 1.4205361228439015, "learning_rate": 3.9410299978251523e-07, "loss": 0.6501, "step": 13695 }, { "epoch": 0.88, "grad_norm": 1.5241084959066407, "learning_rate": 3.936997354227834e-07, "loss": 0.6233, "step": 13696 }, { "epoch": 0.88, "grad_norm": 1.3594725666380274, "learning_rate": 3.932966690321571e-07, "loss": 0.5533, "step": 13697 }, { "epoch": 0.88, "grad_norm": 1.799794799508848, "learning_rate": 3.9289380062796036e-07, "loss": 0.7093, "step": 13698 }, { "epoch": 0.88, "grad_norm": 1.4612497240910345, "learning_rate": 3.92491130227507e-07, "loss": 0.6537, "step": 13699 }, { "epoch": 0.88, "grad_norm": 1.7028143561230293, "learning_rate": 3.92088657848102e-07, "loss": 0.6929, "step": 13700 }, { "epoch": 0.88, "grad_norm": 1.7881650660046415, "learning_rate": 3.9168638350704614e-07, "loss": 0.7329, "step": 13701 }, { "epoch": 0.88, "grad_norm": 1.5931544051647988, "learning_rate": 3.9128430722162437e-07, "loss": 0.7316, "step": 13702 }, { "epoch": 0.88, "grad_norm": 1.4103056014077686, "learning_rate": 3.9088242900911957e-07, "loss": 0.655, "step": 13703 }, { "epoch": 0.88, "grad_norm": 1.5770809403857806, "learning_rate": 3.904807488868029e-07, "loss": 0.6048, "step": 13704 }, { "epoch": 0.88, "grad_norm": 1.4300322615282048, "learning_rate": 3.900792668719372e-07, "loss": 0.5664, "step": 13705 }, { "epoch": 0.88, "grad_norm": 1.6467532167085348, "learning_rate": 3.896779829817787e-07, "loss": 0.6214, "step": 13706 }, { "epoch": 0.88, "grad_norm": 1.7133794895227061, "learning_rate": 3.8927689723357253e-07, "loss": 0.6372, "step": 13707 }, { "epoch": 0.88, "grad_norm": 1.6831410480129987, "learning_rate": 3.888760096445565e-07, "loss": 0.6568, "step": 13708 }, { "epoch": 0.88, "grad_norm": 1.7340248186708576, "learning_rate": 3.8847532023195967e-07, "loss": 0.6414, "step": 13709 }, { "epoch": 0.88, "grad_norm": 1.8346993874437436, "learning_rate": 3.880748290130043e-07, "loss": 0.7454, "step": 13710 }, { "epoch": 0.88, "grad_norm": 1.5973650725223616, "learning_rate": 3.876745360049017e-07, "loss": 0.6103, "step": 13711 }, { "epoch": 0.88, "grad_norm": 1.7994596542478123, "learning_rate": 3.872744412248558e-07, "loss": 0.6284, "step": 13712 }, { "epoch": 0.88, "grad_norm": 1.574429222726664, "learning_rate": 3.868745446900618e-07, "loss": 0.7103, "step": 13713 }, { "epoch": 0.88, "grad_norm": 1.7215028900333609, "learning_rate": 3.864748464177065e-07, "loss": 0.6811, "step": 13714 }, { "epoch": 0.88, "grad_norm": 1.4884668901107005, "learning_rate": 3.860753464249678e-07, "loss": 0.6667, "step": 13715 }, { "epoch": 0.88, "grad_norm": 1.4480498432306141, "learning_rate": 3.856760447290153e-07, "loss": 0.6267, "step": 13716 }, { "epoch": 0.88, "grad_norm": 0.9221893957745674, "learning_rate": 3.8527694134700975e-07, "loss": 0.6393, "step": 13717 }, { "epoch": 0.88, "grad_norm": 1.5046056415252784, "learning_rate": 3.848780362961052e-07, "loss": 0.5913, "step": 13718 }, { "epoch": 0.88, "grad_norm": 1.1859641913738368, "learning_rate": 3.844793295934451e-07, "loss": 0.7233, "step": 13719 }, { "epoch": 0.88, "grad_norm": 1.6294158091184618, "learning_rate": 3.8408082125616473e-07, "loss": 0.5813, "step": 13720 }, { "epoch": 0.88, "grad_norm": 1.5269093624317482, "learning_rate": 3.836825113013909e-07, "loss": 0.6684, "step": 13721 }, { "epoch": 0.88, "grad_norm": 1.607478989687753, "learning_rate": 3.832843997462432e-07, "loss": 0.6824, "step": 13722 }, { "epoch": 0.88, "grad_norm": 1.6303121172954431, "learning_rate": 3.8288648660783023e-07, "loss": 0.7092, "step": 13723 }, { "epoch": 0.88, "grad_norm": 1.6081958455814191, "learning_rate": 3.8248877190325383e-07, "loss": 0.6755, "step": 13724 }, { "epoch": 0.88, "grad_norm": 1.6398830360852916, "learning_rate": 3.820912556496087e-07, "loss": 0.7046, "step": 13725 }, { "epoch": 0.88, "grad_norm": 1.382006396600158, "learning_rate": 3.816939378639767e-07, "loss": 0.6384, "step": 13726 }, { "epoch": 0.88, "grad_norm": 1.1674453921922523, "learning_rate": 3.8129681856343526e-07, "loss": 0.5994, "step": 13727 }, { "epoch": 0.88, "grad_norm": 1.6876763797873948, "learning_rate": 3.808998977650513e-07, "loss": 0.6762, "step": 13728 }, { "epoch": 0.88, "grad_norm": 1.4699595627430972, "learning_rate": 3.8050317548588343e-07, "loss": 0.7172, "step": 13729 }, { "epoch": 0.88, "grad_norm": 1.5826810376820712, "learning_rate": 3.801066517429824e-07, "loss": 0.7399, "step": 13730 }, { "epoch": 0.88, "grad_norm": 1.4717481547893265, "learning_rate": 3.7971032655339026e-07, "loss": 0.5802, "step": 13731 }, { "epoch": 0.88, "grad_norm": 1.6011062004367052, "learning_rate": 3.7931419993413934e-07, "loss": 0.6555, "step": 13732 }, { "epoch": 0.88, "grad_norm": 1.648779464172597, "learning_rate": 3.789182719022538e-07, "loss": 0.5791, "step": 13733 }, { "epoch": 0.88, "grad_norm": 1.5334207306658127, "learning_rate": 3.7852254247475237e-07, "loss": 0.5989, "step": 13734 }, { "epoch": 0.88, "grad_norm": 1.3911253362447509, "learning_rate": 3.781270116686392e-07, "loss": 0.6208, "step": 13735 }, { "epoch": 0.88, "grad_norm": 1.6466636520156581, "learning_rate": 3.7773167950091616e-07, "loss": 0.5791, "step": 13736 }, { "epoch": 0.88, "grad_norm": 2.7094608582324295, "learning_rate": 3.7733654598857303e-07, "loss": 0.7014, "step": 13737 }, { "epoch": 0.88, "grad_norm": 2.189333735768502, "learning_rate": 3.769416111485913e-07, "loss": 0.6631, "step": 13738 }, { "epoch": 0.88, "grad_norm": 1.330124381020821, "learning_rate": 3.7654687499794453e-07, "loss": 0.7459, "step": 13739 }, { "epoch": 0.88, "grad_norm": 1.636080139140601, "learning_rate": 3.761523375535975e-07, "loss": 0.5963, "step": 13740 }, { "epoch": 0.88, "grad_norm": 1.4414972579102172, "learning_rate": 3.757579988325083e-07, "loss": 0.5821, "step": 13741 }, { "epoch": 0.88, "grad_norm": 1.5247578792068748, "learning_rate": 3.7536385885162176e-07, "loss": 0.6395, "step": 13742 }, { "epoch": 0.88, "grad_norm": 1.8174987299153336, "learning_rate": 3.7496991762788095e-07, "loss": 0.6963, "step": 13743 }, { "epoch": 0.88, "grad_norm": 1.7424478184490653, "learning_rate": 3.745761751782123e-07, "loss": 0.6839, "step": 13744 }, { "epoch": 0.88, "grad_norm": 1.7718193342742563, "learning_rate": 3.7418263151954184e-07, "loss": 0.7042, "step": 13745 }, { "epoch": 0.88, "grad_norm": 1.4740427601141965, "learning_rate": 3.737892866687809e-07, "loss": 0.6621, "step": 13746 }, { "epoch": 0.88, "grad_norm": 1.614547984154821, "learning_rate": 3.7339614064283545e-07, "loss": 0.649, "step": 13747 }, { "epoch": 0.88, "grad_norm": 1.8106979097792901, "learning_rate": 3.730031934586031e-07, "loss": 0.5939, "step": 13748 }, { "epoch": 0.88, "grad_norm": 1.7122410204208105, "learning_rate": 3.7261044513296927e-07, "loss": 0.7259, "step": 13749 }, { "epoch": 0.88, "grad_norm": 2.002930229558526, "learning_rate": 3.722178956828165e-07, "loss": 0.7108, "step": 13750 }, { "epoch": 0.88, "grad_norm": 1.7461037835090807, "learning_rate": 3.71825545125013e-07, "loss": 0.6575, "step": 13751 }, { "epoch": 0.88, "grad_norm": 1.1682379814115387, "learning_rate": 3.7143339347642306e-07, "loss": 0.651, "step": 13752 }, { "epoch": 0.88, "grad_norm": 1.59254319158549, "learning_rate": 3.710414407538998e-07, "loss": 0.69, "step": 13753 }, { "epoch": 0.88, "grad_norm": 1.7728749048741632, "learning_rate": 3.706496869742887e-07, "loss": 0.6917, "step": 13754 }, { "epoch": 0.88, "grad_norm": 2.323193534534725, "learning_rate": 3.7025813215442685e-07, "loss": 0.6149, "step": 13755 }, { "epoch": 0.88, "grad_norm": 1.6239420751440001, "learning_rate": 3.6986677631114074e-07, "loss": 0.643, "step": 13756 }, { "epoch": 0.88, "grad_norm": 1.6957482067544858, "learning_rate": 3.694756194612531e-07, "loss": 0.6587, "step": 13757 }, { "epoch": 0.88, "grad_norm": 1.3732092965201312, "learning_rate": 3.690846616215715e-07, "loss": 0.6932, "step": 13758 }, { "epoch": 0.88, "grad_norm": 1.5238687510355005, "learning_rate": 3.686939028089015e-07, "loss": 0.6169, "step": 13759 }, { "epoch": 0.88, "grad_norm": 1.3498998272234977, "learning_rate": 3.683033430400357e-07, "loss": 0.673, "step": 13760 }, { "epoch": 0.88, "grad_norm": 1.5522087242538567, "learning_rate": 3.6791298233175955e-07, "loss": 0.6663, "step": 13761 }, { "epoch": 0.88, "grad_norm": 1.8700550330746755, "learning_rate": 3.675228207008497e-07, "loss": 0.7509, "step": 13762 }, { "epoch": 0.88, "grad_norm": 1.6038798584346041, "learning_rate": 3.671328581640754e-07, "loss": 0.6377, "step": 13763 }, { "epoch": 0.88, "grad_norm": 1.64415714475641, "learning_rate": 3.667430947381956e-07, "loss": 0.5881, "step": 13764 }, { "epoch": 0.88, "grad_norm": 1.1286677429119134, "learning_rate": 3.663535304399607e-07, "loss": 0.6557, "step": 13765 }, { "epoch": 0.88, "grad_norm": 1.5352961653597548, "learning_rate": 3.6596416528611566e-07, "loss": 0.6282, "step": 13766 }, { "epoch": 0.88, "grad_norm": 1.703793441465349, "learning_rate": 3.6557499929339325e-07, "loss": 0.6815, "step": 13767 }, { "epoch": 0.88, "grad_norm": 1.6268798696769464, "learning_rate": 3.6518603247851947e-07, "loss": 0.6774, "step": 13768 }, { "epoch": 0.88, "grad_norm": 1.5134212510547198, "learning_rate": 3.647972648582104e-07, "loss": 0.6956, "step": 13769 }, { "epoch": 0.88, "grad_norm": 1.376222441671742, "learning_rate": 3.64408696449175e-07, "loss": 0.6098, "step": 13770 }, { "epoch": 0.88, "grad_norm": 1.462203304351829, "learning_rate": 3.640203272681131e-07, "loss": 0.6533, "step": 13771 }, { "epoch": 0.88, "grad_norm": 1.7074955734701036, "learning_rate": 3.6363215733171644e-07, "loss": 0.7996, "step": 13772 }, { "epoch": 0.88, "grad_norm": 1.7221289124590815, "learning_rate": 3.6324418665666717e-07, "loss": 0.6746, "step": 13773 }, { "epoch": 0.88, "grad_norm": 1.6861240233988315, "learning_rate": 3.628564152596381e-07, "loss": 0.7038, "step": 13774 }, { "epoch": 0.88, "grad_norm": 1.3014264255032728, "learning_rate": 3.624688431572981e-07, "loss": 0.5638, "step": 13775 }, { "epoch": 0.88, "grad_norm": 1.6116100103930489, "learning_rate": 3.620814703663017e-07, "loss": 0.6355, "step": 13776 }, { "epoch": 0.88, "grad_norm": 1.5250617415504886, "learning_rate": 3.6169429690329825e-07, "loss": 0.5696, "step": 13777 }, { "epoch": 0.88, "grad_norm": 1.3969537047105844, "learning_rate": 3.613073227849279e-07, "loss": 0.6127, "step": 13778 }, { "epoch": 0.88, "grad_norm": 1.5531138850753645, "learning_rate": 3.6092054802782107e-07, "loss": 0.6428, "step": 13779 }, { "epoch": 0.88, "grad_norm": 1.5133275401851678, "learning_rate": 3.6053397264860126e-07, "loss": 0.6521, "step": 13780 }, { "epoch": 0.88, "grad_norm": 1.4631966537680519, "learning_rate": 3.601475966638829e-07, "loss": 0.5535, "step": 13781 }, { "epoch": 0.88, "grad_norm": 1.693232424951378, "learning_rate": 3.5976142009026936e-07, "loss": 0.6219, "step": 13782 }, { "epoch": 0.88, "grad_norm": 1.6669105685156766, "learning_rate": 3.5937544294436134e-07, "loss": 0.6613, "step": 13783 }, { "epoch": 0.88, "grad_norm": 1.4835806196400476, "learning_rate": 3.5898966524274494e-07, "loss": 0.5875, "step": 13784 }, { "epoch": 0.88, "grad_norm": 1.5875579669377984, "learning_rate": 3.586040870020008e-07, "loss": 0.7092, "step": 13785 }, { "epoch": 0.88, "grad_norm": 1.5734704634044494, "learning_rate": 3.582187082386995e-07, "loss": 0.7387, "step": 13786 }, { "epoch": 0.88, "grad_norm": 1.7406845240605004, "learning_rate": 3.5783352896940513e-07, "loss": 0.7185, "step": 13787 }, { "epoch": 0.88, "grad_norm": 1.497368180871677, "learning_rate": 3.5744854921067053e-07, "loss": 0.6168, "step": 13788 }, { "epoch": 0.88, "grad_norm": 1.5716853500514503, "learning_rate": 3.570637689790418e-07, "loss": 0.6553, "step": 13789 }, { "epoch": 0.88, "grad_norm": 2.063249124476478, "learning_rate": 3.566791882910575e-07, "loss": 0.7234, "step": 13790 }, { "epoch": 0.88, "grad_norm": 1.5548807485238318, "learning_rate": 3.562948071632427e-07, "loss": 0.6354, "step": 13791 }, { "epoch": 0.88, "grad_norm": 1.5752236674601288, "learning_rate": 3.5591062561212086e-07, "loss": 0.6714, "step": 13792 }, { "epoch": 0.88, "grad_norm": 1.83071964413355, "learning_rate": 3.55526643654201e-07, "loss": 0.6902, "step": 13793 }, { "epoch": 0.88, "grad_norm": 1.5550401541587489, "learning_rate": 3.551428613059871e-07, "loss": 0.8182, "step": 13794 }, { "epoch": 0.88, "grad_norm": 1.7118573595918543, "learning_rate": 3.5475927858397263e-07, "loss": 0.7328, "step": 13795 }, { "epoch": 0.88, "grad_norm": 1.5147534702005039, "learning_rate": 3.543758955046428e-07, "loss": 0.7169, "step": 13796 }, { "epoch": 0.88, "grad_norm": 1.699211911982497, "learning_rate": 3.539927120844766e-07, "loss": 0.6499, "step": 13797 }, { "epoch": 0.88, "grad_norm": 3.314088117986209, "learning_rate": 3.536097283399392e-07, "loss": 0.6338, "step": 13798 }, { "epoch": 0.88, "grad_norm": 1.1153359868629527, "learning_rate": 3.532269442874942e-07, "loss": 0.7983, "step": 13799 }, { "epoch": 0.88, "grad_norm": 1.6012787841175877, "learning_rate": 3.5284435994358937e-07, "loss": 0.6767, "step": 13800 }, { "epoch": 0.88, "grad_norm": 1.4890673899114772, "learning_rate": 3.524619753246694e-07, "loss": 0.5864, "step": 13801 }, { "epoch": 0.88, "grad_norm": 1.8230241187789058, "learning_rate": 3.520797904471679e-07, "loss": 0.7313, "step": 13802 }, { "epoch": 0.88, "grad_norm": 2.1653035089580883, "learning_rate": 3.516978053275111e-07, "loss": 0.6195, "step": 13803 }, { "epoch": 0.88, "grad_norm": 1.534633074376608, "learning_rate": 3.5131601998211417e-07, "loss": 0.6249, "step": 13804 }, { "epoch": 0.88, "grad_norm": 1.155845721349049, "learning_rate": 3.5093443442738627e-07, "loss": 0.613, "step": 13805 }, { "epoch": 0.88, "grad_norm": 1.0840961575358816, "learning_rate": 3.505530486797287e-07, "loss": 0.5909, "step": 13806 }, { "epoch": 0.88, "grad_norm": 1.5197758922829794, "learning_rate": 3.5017186275553004e-07, "loss": 0.5326, "step": 13807 }, { "epoch": 0.88, "grad_norm": 1.6637730429663662, "learning_rate": 3.4979087667117494e-07, "loss": 0.6879, "step": 13808 }, { "epoch": 0.88, "grad_norm": 1.1296669230895995, "learning_rate": 3.494100904430359e-07, "loss": 0.6495, "step": 13809 }, { "epoch": 0.88, "grad_norm": 2.0757946229042243, "learning_rate": 3.490295040874792e-07, "loss": 0.7105, "step": 13810 }, { "epoch": 0.88, "grad_norm": 1.4765452456529442, "learning_rate": 3.486491176208617e-07, "loss": 0.6712, "step": 13811 }, { "epoch": 0.88, "grad_norm": 1.8243130517989266, "learning_rate": 3.4826893105952997e-07, "loss": 0.723, "step": 13812 }, { "epoch": 0.88, "grad_norm": 1.1950783675906007, "learning_rate": 3.478889444198269e-07, "loss": 0.6715, "step": 13813 }, { "epoch": 0.88, "grad_norm": 1.6844156146268872, "learning_rate": 3.4750915771808005e-07, "loss": 0.7483, "step": 13814 }, { "epoch": 0.88, "grad_norm": 1.8271355078118308, "learning_rate": 3.471295709706146e-07, "loss": 0.6714, "step": 13815 }, { "epoch": 0.88, "grad_norm": 1.6042020455065786, "learning_rate": 3.4675018419374204e-07, "loss": 0.6161, "step": 13816 }, { "epoch": 0.88, "grad_norm": 1.6585947830684382, "learning_rate": 3.4637099740376934e-07, "loss": 0.6896, "step": 13817 }, { "epoch": 0.88, "grad_norm": 1.766705802956921, "learning_rate": 3.459920106169923e-07, "loss": 0.7778, "step": 13818 }, { "epoch": 0.88, "grad_norm": 1.5130137993438868, "learning_rate": 3.4561322384969843e-07, "loss": 0.612, "step": 13819 }, { "epoch": 0.88, "grad_norm": 1.6757090295258892, "learning_rate": 3.4523463711816974e-07, "loss": 0.5593, "step": 13820 }, { "epoch": 0.88, "grad_norm": 1.5823513363385928, "learning_rate": 3.448562504386738e-07, "loss": 0.7009, "step": 13821 }, { "epoch": 0.88, "grad_norm": 1.5198586103212381, "learning_rate": 3.4447806382747584e-07, "loss": 0.6379, "step": 13822 }, { "epoch": 0.88, "grad_norm": 1.4235081532206229, "learning_rate": 3.4410007730082685e-07, "loss": 0.6311, "step": 13823 }, { "epoch": 0.88, "grad_norm": 1.710552805474682, "learning_rate": 3.4372229087497376e-07, "loss": 0.7309, "step": 13824 }, { "epoch": 0.88, "grad_norm": 2.027988245844009, "learning_rate": 3.4334470456615255e-07, "loss": 0.693, "step": 13825 }, { "epoch": 0.88, "grad_norm": 1.503940014344118, "learning_rate": 3.4296731839059073e-07, "loss": 0.6151, "step": 13826 }, { "epoch": 0.89, "grad_norm": 1.187955732084038, "learning_rate": 3.4259013236450755e-07, "loss": 0.7296, "step": 13827 }, { "epoch": 0.89, "grad_norm": 1.7597533703381418, "learning_rate": 3.4221314650411454e-07, "loss": 0.6645, "step": 13828 }, { "epoch": 0.89, "grad_norm": 1.6095033650056128, "learning_rate": 3.4183636082561257e-07, "loss": 0.6469, "step": 13829 }, { "epoch": 0.89, "grad_norm": 1.613288150614467, "learning_rate": 3.414597753451954e-07, "loss": 0.6791, "step": 13830 }, { "epoch": 0.89, "grad_norm": 1.7617274895019361, "learning_rate": 3.4108339007904834e-07, "loss": 0.655, "step": 13831 }, { "epoch": 0.89, "grad_norm": 1.6274937929730562, "learning_rate": 3.40707205043348e-07, "loss": 0.6866, "step": 13832 }, { "epoch": 0.89, "grad_norm": 1.2311849164509479, "learning_rate": 3.4033122025426077e-07, "loss": 0.6491, "step": 13833 }, { "epoch": 0.89, "grad_norm": 1.6972349366792603, "learning_rate": 3.399554357279472e-07, "loss": 0.6797, "step": 13834 }, { "epoch": 0.89, "grad_norm": 1.3878562107154444, "learning_rate": 3.395798514805565e-07, "loss": 0.6611, "step": 13835 }, { "epoch": 0.89, "grad_norm": 1.738810141507811, "learning_rate": 3.392044675282308e-07, "loss": 0.6743, "step": 13836 }, { "epoch": 0.89, "grad_norm": 1.5668082138946544, "learning_rate": 3.3882928388710376e-07, "loss": 0.5851, "step": 13837 }, { "epoch": 0.89, "grad_norm": 1.7255276366616994, "learning_rate": 3.384543005732982e-07, "loss": 0.6763, "step": 13838 }, { "epoch": 0.89, "grad_norm": 1.4976502940875283, "learning_rate": 3.380795176029328e-07, "loss": 0.7368, "step": 13839 }, { "epoch": 0.89, "grad_norm": 1.6162085755922853, "learning_rate": 3.3770493499211356e-07, "loss": 0.6629, "step": 13840 }, { "epoch": 0.89, "grad_norm": 1.6896127011329438, "learning_rate": 3.3733055275693983e-07, "loss": 0.7225, "step": 13841 }, { "epoch": 0.89, "grad_norm": 1.4281571850734631, "learning_rate": 3.369563709135004e-07, "loss": 0.5555, "step": 13842 }, { "epoch": 0.89, "grad_norm": 0.9454270300928266, "learning_rate": 3.3658238947787857e-07, "loss": 0.5742, "step": 13843 }, { "epoch": 0.89, "grad_norm": 1.4909210722546864, "learning_rate": 3.362086084661459e-07, "loss": 0.6694, "step": 13844 }, { "epoch": 0.89, "grad_norm": 1.4959268752426331, "learning_rate": 3.3583502789436783e-07, "loss": 0.551, "step": 13845 }, { "epoch": 0.89, "grad_norm": 1.5719616699882422, "learning_rate": 3.3546164777859936e-07, "loss": 0.661, "step": 13846 }, { "epoch": 0.89, "grad_norm": 1.5456877531273414, "learning_rate": 3.3508846813488647e-07, "loss": 0.6591, "step": 13847 }, { "epoch": 0.89, "grad_norm": 1.3300282508113763, "learning_rate": 3.3471548897926973e-07, "loss": 0.5831, "step": 13848 }, { "epoch": 0.89, "grad_norm": 1.535839528096611, "learning_rate": 3.3434271032777856e-07, "loss": 0.6405, "step": 13849 }, { "epoch": 0.89, "grad_norm": 1.531245198037866, "learning_rate": 3.3397013219643395e-07, "loss": 0.7097, "step": 13850 }, { "epoch": 0.89, "grad_norm": 1.645651373212113, "learning_rate": 3.335977546012481e-07, "loss": 0.7049, "step": 13851 }, { "epoch": 0.89, "grad_norm": 1.6681424389255481, "learning_rate": 3.332255775582249e-07, "loss": 0.7043, "step": 13852 }, { "epoch": 0.89, "grad_norm": 1.1760913275400455, "learning_rate": 3.328536010833605e-07, "loss": 0.6603, "step": 13853 }, { "epoch": 0.89, "grad_norm": 1.5412960828421927, "learning_rate": 3.3248182519264036e-07, "loss": 0.6799, "step": 13854 }, { "epoch": 0.89, "grad_norm": 6.18177124132281, "learning_rate": 3.3211024990204565e-07, "loss": 0.6465, "step": 13855 }, { "epoch": 0.89, "grad_norm": 1.6575118374468143, "learning_rate": 3.317388752275419e-07, "loss": 0.7604, "step": 13856 }, { "epoch": 0.89, "grad_norm": 2.2432938742533177, "learning_rate": 3.3136770118509243e-07, "loss": 0.7306, "step": 13857 }, { "epoch": 0.89, "grad_norm": 1.0785547837195992, "learning_rate": 3.309967277906495e-07, "loss": 0.7037, "step": 13858 }, { "epoch": 0.89, "grad_norm": 1.3559185677794694, "learning_rate": 3.3062595506015594e-07, "loss": 0.6566, "step": 13859 }, { "epoch": 0.89, "grad_norm": 1.6272517773792436, "learning_rate": 3.302553830095473e-07, "loss": 0.6715, "step": 13860 }, { "epoch": 0.89, "grad_norm": 1.1861003246560882, "learning_rate": 3.2988501165474864e-07, "loss": 0.5947, "step": 13861 }, { "epoch": 0.89, "grad_norm": 1.0732475837805706, "learning_rate": 3.295148410116811e-07, "loss": 0.6326, "step": 13862 }, { "epoch": 0.89, "grad_norm": 1.2192880533590564, "learning_rate": 3.291448710962497e-07, "loss": 0.6016, "step": 13863 }, { "epoch": 0.89, "grad_norm": 1.573290167841042, "learning_rate": 3.2877510192435845e-07, "loss": 0.6858, "step": 13864 }, { "epoch": 0.89, "grad_norm": 1.6467951420647016, "learning_rate": 3.284055335118963e-07, "loss": 0.7162, "step": 13865 }, { "epoch": 0.89, "grad_norm": 1.7311977830037584, "learning_rate": 3.2803616587474885e-07, "loss": 0.75, "step": 13866 }, { "epoch": 0.89, "grad_norm": 1.7107726774313168, "learning_rate": 3.276669990287895e-07, "loss": 0.672, "step": 13867 }, { "epoch": 0.89, "grad_norm": 2.2590470481425085, "learning_rate": 3.272980329898845e-07, "loss": 0.6174, "step": 13868 }, { "epoch": 0.89, "grad_norm": 1.375288530591467, "learning_rate": 3.269292677738922e-07, "loss": 0.5703, "step": 13869 }, { "epoch": 0.89, "grad_norm": 1.6649631877084985, "learning_rate": 3.265607033966595e-07, "loss": 0.6692, "step": 13870 }, { "epoch": 0.89, "grad_norm": 1.5624423296966954, "learning_rate": 3.261923398740285e-07, "loss": 0.6838, "step": 13871 }, { "epoch": 0.89, "grad_norm": 1.4994463839637906, "learning_rate": 3.2582417722182845e-07, "loss": 0.5773, "step": 13872 }, { "epoch": 0.89, "grad_norm": 1.6416704850783346, "learning_rate": 3.2545621545588434e-07, "loss": 0.692, "step": 13873 }, { "epoch": 0.89, "grad_norm": 1.431445738717618, "learning_rate": 3.250884545920091e-07, "loss": 0.6771, "step": 13874 }, { "epoch": 0.89, "grad_norm": 1.5802607351685214, "learning_rate": 3.2472089464600844e-07, "loss": 0.618, "step": 13875 }, { "epoch": 0.89, "grad_norm": 1.171748458247516, "learning_rate": 3.243535356336808e-07, "loss": 0.8057, "step": 13876 }, { "epoch": 0.89, "grad_norm": 1.6276610260724724, "learning_rate": 3.2398637757081187e-07, "loss": 0.6983, "step": 13877 }, { "epoch": 0.89, "grad_norm": 1.564232600017761, "learning_rate": 3.2361942047318463e-07, "loss": 0.6275, "step": 13878 }, { "epoch": 0.89, "grad_norm": 1.9698797742937735, "learning_rate": 3.232526643565664e-07, "loss": 0.6887, "step": 13879 }, { "epoch": 0.89, "grad_norm": 1.869529331059345, "learning_rate": 3.228861092367225e-07, "loss": 0.6049, "step": 13880 }, { "epoch": 0.89, "grad_norm": 1.10869120414787, "learning_rate": 3.2251975512940516e-07, "loss": 0.5857, "step": 13881 }, { "epoch": 0.89, "grad_norm": 1.8122244504048137, "learning_rate": 3.221536020503602e-07, "loss": 0.7732, "step": 13882 }, { "epoch": 0.89, "grad_norm": 1.5492485073192992, "learning_rate": 3.2178765001532386e-07, "loss": 0.6201, "step": 13883 }, { "epoch": 0.89, "grad_norm": 1.4761495426361027, "learning_rate": 3.214218990400236e-07, "loss": 0.6082, "step": 13884 }, { "epoch": 0.89, "grad_norm": 1.4274959467232298, "learning_rate": 3.210563491401808e-07, "loss": 0.6111, "step": 13885 }, { "epoch": 0.89, "grad_norm": 2.1597025284817253, "learning_rate": 3.2069100033150225e-07, "loss": 0.6538, "step": 13886 }, { "epoch": 0.89, "grad_norm": 1.6084457201214757, "learning_rate": 3.203258526296937e-07, "loss": 0.6263, "step": 13887 }, { "epoch": 0.89, "grad_norm": 2.112638035832462, "learning_rate": 3.1996090605044496e-07, "loss": 0.6595, "step": 13888 }, { "epoch": 0.89, "grad_norm": 1.6039075739240922, "learning_rate": 3.195961606094433e-07, "loss": 0.6729, "step": 13889 }, { "epoch": 0.89, "grad_norm": 1.0394314361520982, "learning_rate": 3.1923161632236355e-07, "loss": 0.6515, "step": 13890 }, { "epoch": 0.89, "grad_norm": 1.1136156928128582, "learning_rate": 3.188672732048731e-07, "loss": 0.5725, "step": 13891 }, { "epoch": 0.89, "grad_norm": 2.2783919173111515, "learning_rate": 3.185031312726311e-07, "loss": 0.6311, "step": 13892 }, { "epoch": 0.89, "grad_norm": 1.446439926297454, "learning_rate": 3.181391905412867e-07, "loss": 0.5671, "step": 13893 }, { "epoch": 0.89, "grad_norm": 1.547040381916417, "learning_rate": 3.1777545102648354e-07, "loss": 0.6459, "step": 13894 }, { "epoch": 0.89, "grad_norm": 1.4675297561913232, "learning_rate": 3.1741191274385076e-07, "loss": 0.6654, "step": 13895 }, { "epoch": 0.89, "grad_norm": 1.1169973404990265, "learning_rate": 3.170485757090158e-07, "loss": 0.7102, "step": 13896 }, { "epoch": 0.89, "grad_norm": 1.2665400433994525, "learning_rate": 3.1668543993759293e-07, "loss": 0.6597, "step": 13897 }, { "epoch": 0.89, "grad_norm": 1.6717614513302557, "learning_rate": 3.163225054451885e-07, "loss": 0.77, "step": 13898 }, { "epoch": 0.89, "grad_norm": 1.5012311923592334, "learning_rate": 3.159597722474006e-07, "loss": 0.7038, "step": 13899 }, { "epoch": 0.89, "grad_norm": 1.6160803975767355, "learning_rate": 3.155972403598201e-07, "loss": 0.6869, "step": 13900 }, { "epoch": 0.89, "grad_norm": 1.5552759706451673, "learning_rate": 3.152349097980262e-07, "loss": 0.7011, "step": 13901 }, { "epoch": 0.89, "grad_norm": 1.5922140484358254, "learning_rate": 3.1487278057759196e-07, "loss": 0.6542, "step": 13902 }, { "epoch": 0.89, "grad_norm": 1.6238153780199849, "learning_rate": 3.1451085271408053e-07, "loss": 0.7162, "step": 13903 }, { "epoch": 0.89, "grad_norm": 1.1910962686326911, "learning_rate": 3.141491262230473e-07, "loss": 0.6563, "step": 13904 }, { "epoch": 0.89, "grad_norm": 1.262809331675104, "learning_rate": 3.137876011200386e-07, "loss": 0.6307, "step": 13905 }, { "epoch": 0.89, "grad_norm": 1.5584256677553059, "learning_rate": 3.134262774205915e-07, "loss": 0.6492, "step": 13906 }, { "epoch": 0.89, "grad_norm": 1.6110712895835577, "learning_rate": 3.130651551402353e-07, "loss": 0.6334, "step": 13907 }, { "epoch": 0.89, "grad_norm": 1.565706790187137, "learning_rate": 3.127042342944897e-07, "loss": 0.6472, "step": 13908 }, { "epoch": 0.89, "grad_norm": 1.4800262332777687, "learning_rate": 3.123435148988674e-07, "loss": 0.6453, "step": 13909 }, { "epoch": 0.89, "grad_norm": 1.5591969345537933, "learning_rate": 3.1198299696886925e-07, "loss": 0.7549, "step": 13910 }, { "epoch": 0.89, "grad_norm": 1.594354732341683, "learning_rate": 3.116226805199929e-07, "loss": 0.7254, "step": 13911 }, { "epoch": 0.89, "grad_norm": 1.6613905529389164, "learning_rate": 3.1126256556772096e-07, "loss": 0.6568, "step": 13912 }, { "epoch": 0.89, "grad_norm": 1.5510942159367462, "learning_rate": 3.1090265212753214e-07, "loss": 0.6152, "step": 13913 }, { "epoch": 0.89, "grad_norm": 1.8324526211844725, "learning_rate": 3.1054294021489353e-07, "loss": 0.7265, "step": 13914 }, { "epoch": 0.89, "grad_norm": 1.7399070760519846, "learning_rate": 3.101834298452661e-07, "loss": 0.6206, "step": 13915 }, { "epoch": 0.89, "grad_norm": 1.3866409280116643, "learning_rate": 3.098241210341002e-07, "loss": 0.5878, "step": 13916 }, { "epoch": 0.89, "grad_norm": 1.462025627213554, "learning_rate": 3.094650137968369e-07, "loss": 0.6136, "step": 13917 }, { "epoch": 0.89, "grad_norm": 1.52930858898361, "learning_rate": 3.0910610814891327e-07, "loss": 0.6723, "step": 13918 }, { "epoch": 0.89, "grad_norm": 1.2991095653491773, "learning_rate": 3.0874740410575033e-07, "loss": 0.6279, "step": 13919 }, { "epoch": 0.89, "grad_norm": 1.6614441580599582, "learning_rate": 3.083889016827679e-07, "loss": 0.7511, "step": 13920 }, { "epoch": 0.89, "grad_norm": 1.5477632867092335, "learning_rate": 3.080306008953704e-07, "loss": 0.614, "step": 13921 }, { "epoch": 0.89, "grad_norm": 1.642770625751112, "learning_rate": 3.0767250175895933e-07, "loss": 0.6878, "step": 13922 }, { "epoch": 0.89, "grad_norm": 1.5396325474886707, "learning_rate": 3.0731460428892414e-07, "loss": 0.6556, "step": 13923 }, { "epoch": 0.89, "grad_norm": 1.2481457339757558, "learning_rate": 3.069569085006463e-07, "loss": 0.6548, "step": 13924 }, { "epoch": 0.89, "grad_norm": 1.837751939132481, "learning_rate": 3.065994144094997e-07, "loss": 0.7314, "step": 13925 }, { "epoch": 0.89, "grad_norm": 1.4756554727408742, "learning_rate": 3.06242122030847e-07, "loss": 0.6837, "step": 13926 }, { "epoch": 0.89, "grad_norm": 1.7100623745270047, "learning_rate": 3.0588503138004597e-07, "loss": 0.7484, "step": 13927 }, { "epoch": 0.89, "grad_norm": 1.5795987090811106, "learning_rate": 3.055281424724421e-07, "loss": 0.6879, "step": 13928 }, { "epoch": 0.89, "grad_norm": 1.1831475874704773, "learning_rate": 3.051714553233748e-07, "loss": 0.6489, "step": 13929 }, { "epoch": 0.89, "grad_norm": 1.684941701959894, "learning_rate": 3.048149699481723e-07, "loss": 0.7851, "step": 13930 }, { "epoch": 0.89, "grad_norm": 0.9805510104853838, "learning_rate": 3.044586863621568e-07, "loss": 0.6184, "step": 13931 }, { "epoch": 0.89, "grad_norm": 1.738699310114388, "learning_rate": 3.0410260458064056e-07, "loss": 0.862, "step": 13932 }, { "epoch": 0.89, "grad_norm": 1.7133768202440272, "learning_rate": 3.0374672461892574e-07, "loss": 0.6473, "step": 13933 }, { "epoch": 0.89, "grad_norm": 1.7838099443551845, "learning_rate": 3.0339104649231064e-07, "loss": 0.6319, "step": 13934 }, { "epoch": 0.89, "grad_norm": 1.4631941053357118, "learning_rate": 3.0303557021607754e-07, "loss": 0.6606, "step": 13935 }, { "epoch": 0.89, "grad_norm": 1.254982417037709, "learning_rate": 3.026802958055075e-07, "loss": 0.6549, "step": 13936 }, { "epoch": 0.89, "grad_norm": 1.1368920511531957, "learning_rate": 3.023252232758667e-07, "loss": 0.7079, "step": 13937 }, { "epoch": 0.89, "grad_norm": 1.5076687135968674, "learning_rate": 3.019703526424167e-07, "loss": 0.6089, "step": 13938 }, { "epoch": 0.89, "grad_norm": 1.1764592902959303, "learning_rate": 3.0161568392040986e-07, "loss": 0.6478, "step": 13939 }, { "epoch": 0.89, "grad_norm": 1.6641247894651834, "learning_rate": 3.012612171250867e-07, "loss": 0.8148, "step": 13940 }, { "epoch": 0.89, "grad_norm": 1.769708377426869, "learning_rate": 3.009069522716851e-07, "loss": 0.6511, "step": 13941 }, { "epoch": 0.89, "grad_norm": 0.9606973671194305, "learning_rate": 3.005528893754267e-07, "loss": 0.6451, "step": 13942 }, { "epoch": 0.89, "grad_norm": 1.7282348415157718, "learning_rate": 3.0019902845153216e-07, "loss": 0.6327, "step": 13943 }, { "epoch": 0.89, "grad_norm": 1.524891527471068, "learning_rate": 2.9984536951520595e-07, "loss": 0.6527, "step": 13944 }, { "epoch": 0.89, "grad_norm": 1.7006777948560448, "learning_rate": 2.994919125816498e-07, "loss": 0.7026, "step": 13945 }, { "epoch": 0.89, "grad_norm": 1.628254905735132, "learning_rate": 2.991386576660543e-07, "loss": 0.5847, "step": 13946 }, { "epoch": 0.89, "grad_norm": 2.028342501006796, "learning_rate": 2.9878560478360186e-07, "loss": 0.7409, "step": 13947 }, { "epoch": 0.89, "grad_norm": 1.672949422333962, "learning_rate": 2.9843275394946526e-07, "loss": 0.701, "step": 13948 }, { "epoch": 0.89, "grad_norm": 1.2406037686891593, "learning_rate": 2.980801051788085e-07, "loss": 0.7043, "step": 13949 }, { "epoch": 0.89, "grad_norm": 1.7269487105796808, "learning_rate": 2.977276584867905e-07, "loss": 0.6983, "step": 13950 }, { "epoch": 0.89, "grad_norm": 1.5737680888575318, "learning_rate": 2.973754138885554e-07, "loss": 0.5573, "step": 13951 }, { "epoch": 0.89, "grad_norm": 1.5007008853281694, "learning_rate": 2.970233713992443e-07, "loss": 0.7085, "step": 13952 }, { "epoch": 0.89, "grad_norm": 1.7470805302781203, "learning_rate": 2.9667153103398573e-07, "loss": 0.6101, "step": 13953 }, { "epoch": 0.89, "grad_norm": 1.6071944986959061, "learning_rate": 2.9631989280790254e-07, "loss": 0.7272, "step": 13954 }, { "epoch": 0.89, "grad_norm": 1.1232949406750488, "learning_rate": 2.9596845673610597e-07, "loss": 0.6524, "step": 13955 }, { "epoch": 0.89, "grad_norm": 1.871658444595365, "learning_rate": 2.956172228337012e-07, "loss": 0.7314, "step": 13956 }, { "epoch": 0.89, "grad_norm": 1.768192002023208, "learning_rate": 2.9526619111578223e-07, "loss": 0.6708, "step": 13957 }, { "epoch": 0.89, "grad_norm": 1.6923187075591148, "learning_rate": 2.9491536159743595e-07, "loss": 0.6923, "step": 13958 }, { "epoch": 0.89, "grad_norm": 1.7507205062149183, "learning_rate": 2.945647342937413e-07, "loss": 0.704, "step": 13959 }, { "epoch": 0.89, "grad_norm": 1.672929983773355, "learning_rate": 2.9421430921976746e-07, "loss": 0.6789, "step": 13960 }, { "epoch": 0.89, "grad_norm": 1.5066530387632373, "learning_rate": 2.9386408639057394e-07, "loss": 0.6647, "step": 13961 }, { "epoch": 0.89, "grad_norm": 1.5804114243073222, "learning_rate": 2.9351406582121264e-07, "loss": 0.6234, "step": 13962 }, { "epoch": 0.89, "grad_norm": 1.6115644194362406, "learning_rate": 2.9316424752672766e-07, "loss": 0.7053, "step": 13963 }, { "epoch": 0.89, "grad_norm": 1.5278521531644236, "learning_rate": 2.9281463152215304e-07, "loss": 0.6993, "step": 13964 }, { "epoch": 0.89, "grad_norm": 1.4981770838775765, "learning_rate": 2.9246521782251403e-07, "loss": 0.6986, "step": 13965 }, { "epoch": 0.89, "grad_norm": 1.1187443424588963, "learning_rate": 2.92116006442828e-07, "loss": 0.6515, "step": 13966 }, { "epoch": 0.89, "grad_norm": 1.592094805089963, "learning_rate": 2.91766997398103e-07, "loss": 0.6257, "step": 13967 }, { "epoch": 0.89, "grad_norm": 1.663275469871892, "learning_rate": 2.914181907033392e-07, "loss": 0.6903, "step": 13968 }, { "epoch": 0.89, "grad_norm": 1.4108697235455627, "learning_rate": 2.91069586373528e-07, "loss": 0.6189, "step": 13969 }, { "epoch": 0.89, "grad_norm": 1.5218811494997255, "learning_rate": 2.9072118442365126e-07, "loss": 0.6431, "step": 13970 }, { "epoch": 0.89, "grad_norm": 1.9276569170306828, "learning_rate": 2.9037298486868205e-07, "loss": 0.6932, "step": 13971 }, { "epoch": 0.89, "grad_norm": 1.691577609369676, "learning_rate": 2.9002498772358556e-07, "loss": 0.6758, "step": 13972 }, { "epoch": 0.89, "grad_norm": 1.506191039072478, "learning_rate": 2.8967719300331875e-07, "loss": 0.5577, "step": 13973 }, { "epoch": 0.89, "grad_norm": 1.5695042553529448, "learning_rate": 2.89329600722828e-07, "loss": 0.7519, "step": 13974 }, { "epoch": 0.89, "grad_norm": 1.3698426661600034, "learning_rate": 2.8898221089705194e-07, "loss": 0.6838, "step": 13975 }, { "epoch": 0.89, "grad_norm": 1.6281372327633892, "learning_rate": 2.886350235409224e-07, "loss": 0.6808, "step": 13976 }, { "epoch": 0.89, "grad_norm": 1.6816490671505593, "learning_rate": 2.882880386693582e-07, "loss": 0.6948, "step": 13977 }, { "epoch": 0.89, "grad_norm": 1.799849793570299, "learning_rate": 2.8794125629727444e-07, "loss": 0.679, "step": 13978 }, { "epoch": 0.89, "grad_norm": 1.5595345016279814, "learning_rate": 2.8759467643957375e-07, "loss": 0.6747, "step": 13979 }, { "epoch": 0.89, "grad_norm": 1.6010723322514506, "learning_rate": 2.872482991111519e-07, "loss": 0.6733, "step": 13980 }, { "epoch": 0.89, "grad_norm": 1.5502369195358974, "learning_rate": 2.8690212432689546e-07, "loss": 0.6737, "step": 13981 }, { "epoch": 0.89, "grad_norm": 1.7692726847493865, "learning_rate": 2.865561521016813e-07, "loss": 0.6543, "step": 13982 }, { "epoch": 0.89, "grad_norm": 1.8946272651849383, "learning_rate": 2.862103824503809e-07, "loss": 0.6336, "step": 13983 }, { "epoch": 0.9, "grad_norm": 0.9359300416512336, "learning_rate": 2.858648153878518e-07, "loss": 0.6164, "step": 13984 }, { "epoch": 0.9, "grad_norm": 1.5282700675096825, "learning_rate": 2.855194509289483e-07, "loss": 0.7212, "step": 13985 }, { "epoch": 0.9, "grad_norm": 1.679716850435142, "learning_rate": 2.851742890885112e-07, "loss": 0.69, "step": 13986 }, { "epoch": 0.9, "grad_norm": 1.6345309965594839, "learning_rate": 2.8482932988137647e-07, "loss": 0.5913, "step": 13987 }, { "epoch": 0.9, "grad_norm": 1.9218567552068877, "learning_rate": 2.8448457332236945e-07, "loss": 0.6263, "step": 13988 }, { "epoch": 0.9, "grad_norm": 1.6037297162353634, "learning_rate": 2.8414001942630556e-07, "loss": 0.6731, "step": 13989 }, { "epoch": 0.9, "grad_norm": 1.5441477798325693, "learning_rate": 2.837956682079962e-07, "loss": 0.741, "step": 13990 }, { "epoch": 0.9, "grad_norm": 1.506858717416728, "learning_rate": 2.834515196822374e-07, "loss": 0.6826, "step": 13991 }, { "epoch": 0.9, "grad_norm": 1.594202857587882, "learning_rate": 2.831075738638228e-07, "loss": 0.6473, "step": 13992 }, { "epoch": 0.9, "grad_norm": 1.4773744409943705, "learning_rate": 2.8276383076753175e-07, "loss": 0.6906, "step": 13993 }, { "epoch": 0.9, "grad_norm": 1.5345843578837606, "learning_rate": 2.8242029040813965e-07, "loss": 0.639, "step": 13994 }, { "epoch": 0.9, "grad_norm": 1.5424818466392725, "learning_rate": 2.8207695280041025e-07, "loss": 0.572, "step": 13995 }, { "epoch": 0.9, "grad_norm": 1.487991849810935, "learning_rate": 2.8173381795910006e-07, "loss": 0.6282, "step": 13996 }, { "epoch": 0.9, "grad_norm": 1.585972955818606, "learning_rate": 2.813908858989556e-07, "loss": 0.6626, "step": 13997 }, { "epoch": 0.9, "grad_norm": 1.6549634552100692, "learning_rate": 2.8104815663471506e-07, "loss": 0.7751, "step": 13998 }, { "epoch": 0.9, "grad_norm": 1.6457800291833165, "learning_rate": 2.8070563018111063e-07, "loss": 0.6329, "step": 13999 }, { "epoch": 0.9, "grad_norm": 1.808770302777001, "learning_rate": 2.803633065528599e-07, "loss": 0.678, "step": 14000 }, { "epoch": 0.9, "grad_norm": 1.5430410749006112, "learning_rate": 2.8002118576467784e-07, "loss": 0.7065, "step": 14001 }, { "epoch": 0.9, "grad_norm": 1.5281876756589456, "learning_rate": 2.796792678312671e-07, "loss": 0.6401, "step": 14002 }, { "epoch": 0.9, "grad_norm": 2.192624358280187, "learning_rate": 2.7933755276732257e-07, "loss": 0.6921, "step": 14003 }, { "epoch": 0.9, "grad_norm": 1.7332091296305172, "learning_rate": 2.7899604058753025e-07, "loss": 0.5578, "step": 14004 }, { "epoch": 0.9, "grad_norm": 1.5106129910721913, "learning_rate": 2.7865473130656794e-07, "loss": 0.6813, "step": 14005 }, { "epoch": 0.9, "grad_norm": 1.5274309929142504, "learning_rate": 2.78313624939105e-07, "loss": 0.6589, "step": 14006 }, { "epoch": 0.9, "grad_norm": 1.7710147303212993, "learning_rate": 2.779727214997996e-07, "loss": 0.6736, "step": 14007 }, { "epoch": 0.9, "grad_norm": 1.9063093694806246, "learning_rate": 2.7763202100330624e-07, "loss": 0.6732, "step": 14008 }, { "epoch": 0.9, "grad_norm": 1.0615498981985398, "learning_rate": 2.7729152346426366e-07, "loss": 0.6874, "step": 14009 }, { "epoch": 0.9, "grad_norm": 6.466547542412566, "learning_rate": 2.7695122889730865e-07, "loss": 0.6082, "step": 14010 }, { "epoch": 0.9, "grad_norm": 1.6420966036831024, "learning_rate": 2.766111373170649e-07, "loss": 0.6733, "step": 14011 }, { "epoch": 0.9, "grad_norm": 1.3288725496732687, "learning_rate": 2.762712487381497e-07, "loss": 0.5172, "step": 14012 }, { "epoch": 0.9, "grad_norm": 1.6737379171627689, "learning_rate": 2.7593156317516966e-07, "loss": 0.6948, "step": 14013 }, { "epoch": 0.9, "grad_norm": 1.0090432924178028, "learning_rate": 2.7559208064272423e-07, "loss": 0.5847, "step": 14014 }, { "epoch": 0.9, "grad_norm": 1.6318922330549352, "learning_rate": 2.752528011554051e-07, "loss": 0.7193, "step": 14015 }, { "epoch": 0.9, "grad_norm": 2.0362777340147176, "learning_rate": 2.749137247277911e-07, "loss": 0.5927, "step": 14016 }, { "epoch": 0.9, "grad_norm": 1.70118984405734, "learning_rate": 2.7457485137445725e-07, "loss": 0.6754, "step": 14017 }, { "epoch": 0.9, "grad_norm": 2.8815264698240406, "learning_rate": 2.7423618110996697e-07, "loss": 0.7227, "step": 14018 }, { "epoch": 0.9, "grad_norm": 1.8150463651142599, "learning_rate": 2.738977139488752e-07, "loss": 0.6709, "step": 14019 }, { "epoch": 0.9, "grad_norm": 1.4621639215842628, "learning_rate": 2.735594499057287e-07, "loss": 0.6349, "step": 14020 }, { "epoch": 0.9, "grad_norm": 2.466467621753063, "learning_rate": 2.732213889950652e-07, "loss": 0.6995, "step": 14021 }, { "epoch": 0.9, "grad_norm": 2.1424854292977624, "learning_rate": 2.728835312314143e-07, "loss": 0.6856, "step": 14022 }, { "epoch": 0.9, "grad_norm": 1.6162637739996755, "learning_rate": 2.725458766292954e-07, "loss": 0.6825, "step": 14023 }, { "epoch": 0.9, "grad_norm": 1.1399180679262233, "learning_rate": 2.72208425203222e-07, "loss": 0.7004, "step": 14024 }, { "epoch": 0.9, "grad_norm": 1.4500131522068962, "learning_rate": 2.718711769676957e-07, "loss": 0.6079, "step": 14025 }, { "epoch": 0.9, "grad_norm": 1.5167435044462578, "learning_rate": 2.715341319372117e-07, "loss": 0.6871, "step": 14026 }, { "epoch": 0.9, "grad_norm": 1.6683987833172849, "learning_rate": 2.711972901262538e-07, "loss": 0.6826, "step": 14027 }, { "epoch": 0.9, "grad_norm": 1.6962652520447743, "learning_rate": 2.708606515493006e-07, "loss": 0.6261, "step": 14028 }, { "epoch": 0.9, "grad_norm": 1.3902263234224013, "learning_rate": 2.705242162208188e-07, "loss": 0.5654, "step": 14029 }, { "epoch": 0.9, "grad_norm": 1.5692825905823409, "learning_rate": 2.701879841552685e-07, "loss": 0.6234, "step": 14030 }, { "epoch": 0.9, "grad_norm": 1.0233813935712412, "learning_rate": 2.698519553670992e-07, "loss": 0.6385, "step": 14031 }, { "epoch": 0.9, "grad_norm": 1.0329271101277127, "learning_rate": 2.695161298707538e-07, "loss": 0.618, "step": 14032 }, { "epoch": 0.9, "grad_norm": 1.6515962047423676, "learning_rate": 2.6918050768066527e-07, "loss": 0.6655, "step": 14033 }, { "epoch": 0.9, "grad_norm": 1.084402993574018, "learning_rate": 2.6884508881125814e-07, "loss": 0.6322, "step": 14034 }, { "epoch": 0.9, "grad_norm": 2.5148724661118256, "learning_rate": 2.68509873276947e-07, "loss": 0.6373, "step": 14035 }, { "epoch": 0.9, "grad_norm": 2.6943133769133314, "learning_rate": 2.681748610921392e-07, "loss": 0.7282, "step": 14036 }, { "epoch": 0.9, "grad_norm": 1.3765162289308661, "learning_rate": 2.678400522712332e-07, "loss": 0.8081, "step": 14037 }, { "epoch": 0.9, "grad_norm": 1.6581384869952742, "learning_rate": 2.675054468286181e-07, "loss": 0.6936, "step": 14038 }, { "epoch": 0.9, "grad_norm": 1.4811828627372576, "learning_rate": 2.6717104477867464e-07, "loss": 0.689, "step": 14039 }, { "epoch": 0.9, "grad_norm": 0.9898180506048667, "learning_rate": 2.66836846135774e-07, "loss": 0.5874, "step": 14040 }, { "epoch": 0.9, "grad_norm": 1.4385615825802833, "learning_rate": 2.665028509142803e-07, "loss": 0.6398, "step": 14041 }, { "epoch": 0.9, "grad_norm": 1.622667027747636, "learning_rate": 2.661690591285482e-07, "loss": 0.7179, "step": 14042 }, { "epoch": 0.9, "grad_norm": 1.3905148105493308, "learning_rate": 2.6583547079292224e-07, "loss": 0.6835, "step": 14043 }, { "epoch": 0.9, "grad_norm": 1.5331452657137405, "learning_rate": 2.6550208592173996e-07, "loss": 0.6679, "step": 14044 }, { "epoch": 0.9, "grad_norm": 1.533184876352356, "learning_rate": 2.651689045293293e-07, "loss": 0.6149, "step": 14045 }, { "epoch": 0.9, "grad_norm": 1.3938733641763414, "learning_rate": 2.648359266300105e-07, "loss": 0.6519, "step": 14046 }, { "epoch": 0.9, "grad_norm": 1.724346145310095, "learning_rate": 2.645031522380925e-07, "loss": 0.6785, "step": 14047 }, { "epoch": 0.9, "grad_norm": 1.5175570778957137, "learning_rate": 2.6417058136787965e-07, "loss": 0.6178, "step": 14048 }, { "epoch": 0.9, "grad_norm": 1.5779388520291995, "learning_rate": 2.638382140336626e-07, "loss": 0.6569, "step": 14049 }, { "epoch": 0.9, "grad_norm": 1.4278636469127193, "learning_rate": 2.6350605024972884e-07, "loss": 0.6493, "step": 14050 }, { "epoch": 0.9, "grad_norm": 1.0844979199117595, "learning_rate": 2.631740900303503e-07, "loss": 0.5998, "step": 14051 }, { "epoch": 0.9, "grad_norm": 1.428545300295617, "learning_rate": 2.6284233338979724e-07, "loss": 0.7957, "step": 14052 }, { "epoch": 0.9, "grad_norm": 1.0925571948444999, "learning_rate": 2.6251078034232605e-07, "loss": 0.7237, "step": 14053 }, { "epoch": 0.9, "grad_norm": 1.0576178089187955, "learning_rate": 2.621794309021863e-07, "loss": 0.6005, "step": 14054 }, { "epoch": 0.9, "grad_norm": 1.3903336417786558, "learning_rate": 2.6184828508362016e-07, "loss": 0.6357, "step": 14055 }, { "epoch": 0.9, "grad_norm": 1.1104408704759796, "learning_rate": 2.6151734290085715e-07, "loss": 0.6254, "step": 14056 }, { "epoch": 0.9, "grad_norm": 1.6084182969424723, "learning_rate": 2.6118660436812326e-07, "loss": 0.7334, "step": 14057 }, { "epoch": 0.9, "grad_norm": 1.5077448386401298, "learning_rate": 2.608560694996304e-07, "loss": 0.7296, "step": 14058 }, { "epoch": 0.9, "grad_norm": 1.626045720408535, "learning_rate": 2.605257383095855e-07, "loss": 0.6929, "step": 14059 }, { "epoch": 0.9, "grad_norm": 1.5232632850920558, "learning_rate": 2.6019561081218614e-07, "loss": 0.6569, "step": 14060 }, { "epoch": 0.9, "grad_norm": 1.6265925905317884, "learning_rate": 2.5986568702161817e-07, "loss": 0.6212, "step": 14061 }, { "epoch": 0.9, "grad_norm": 1.5246106831455242, "learning_rate": 2.5953596695206475e-07, "loss": 0.6214, "step": 14062 }, { "epoch": 0.9, "grad_norm": 1.5927883458816583, "learning_rate": 2.5920645061769225e-07, "loss": 0.7251, "step": 14063 }, { "epoch": 0.9, "grad_norm": 1.6196381195145706, "learning_rate": 2.5887713803266656e-07, "loss": 0.7457, "step": 14064 }, { "epoch": 0.9, "grad_norm": 1.617051668088377, "learning_rate": 2.585480292111375e-07, "loss": 0.6817, "step": 14065 }, { "epoch": 0.9, "grad_norm": 1.6962795896642802, "learning_rate": 2.5821912416725157e-07, "loss": 0.5746, "step": 14066 }, { "epoch": 0.9, "grad_norm": 1.482852989253273, "learning_rate": 2.578904229151441e-07, "loss": 0.6018, "step": 14067 }, { "epoch": 0.9, "grad_norm": 1.4687891436263318, "learning_rate": 2.5756192546894156e-07, "loss": 0.6064, "step": 14068 }, { "epoch": 0.9, "grad_norm": 1.9938364276391407, "learning_rate": 2.5723363184276207e-07, "loss": 0.6173, "step": 14069 }, { "epoch": 0.9, "grad_norm": 1.7670030474494884, "learning_rate": 2.5690554205071495e-07, "loss": 0.6587, "step": 14070 }, { "epoch": 0.9, "grad_norm": 1.566181336668517, "learning_rate": 2.5657765610690223e-07, "loss": 0.707, "step": 14071 }, { "epoch": 0.9, "grad_norm": 1.487973440309875, "learning_rate": 2.562499740254126e-07, "loss": 0.5754, "step": 14072 }, { "epoch": 0.9, "grad_norm": 1.6081699690147755, "learning_rate": 2.559224958203321e-07, "loss": 0.6844, "step": 14073 }, { "epoch": 0.9, "grad_norm": 1.5600637006668139, "learning_rate": 2.555952215057345e-07, "loss": 0.7051, "step": 14074 }, { "epoch": 0.9, "grad_norm": 1.0439923125851815, "learning_rate": 2.552681510956845e-07, "loss": 0.5436, "step": 14075 }, { "epoch": 0.9, "grad_norm": 1.6451599887416528, "learning_rate": 2.549412846042393e-07, "loss": 0.7102, "step": 14076 }, { "epoch": 0.9, "grad_norm": 1.3206682071255658, "learning_rate": 2.546146220454471e-07, "loss": 0.6649, "step": 14077 }, { "epoch": 0.9, "grad_norm": 1.556174928565538, "learning_rate": 2.542881634333472e-07, "loss": 0.6124, "step": 14078 }, { "epoch": 0.9, "grad_norm": 1.511166501696963, "learning_rate": 2.539619087819689e-07, "loss": 0.6839, "step": 14079 }, { "epoch": 0.9, "grad_norm": 1.5872489804193757, "learning_rate": 2.5363585810533606e-07, "loss": 0.6503, "step": 14080 }, { "epoch": 0.9, "grad_norm": 1.6064879607427274, "learning_rate": 2.533100114174597e-07, "loss": 0.729, "step": 14081 }, { "epoch": 0.9, "grad_norm": 1.5813750305884757, "learning_rate": 2.529843687323452e-07, "loss": 0.7529, "step": 14082 }, { "epoch": 0.9, "grad_norm": 2.4077350407976046, "learning_rate": 2.526589300639881e-07, "loss": 0.6342, "step": 14083 }, { "epoch": 0.9, "grad_norm": 1.618890777110047, "learning_rate": 2.523336954263744e-07, "loss": 0.6459, "step": 14084 }, { "epoch": 0.9, "grad_norm": 1.7972046766977625, "learning_rate": 2.520086648334824e-07, "loss": 0.7454, "step": 14085 }, { "epoch": 0.9, "grad_norm": 1.10252513767916, "learning_rate": 2.5168383829928087e-07, "loss": 0.5193, "step": 14086 }, { "epoch": 0.9, "grad_norm": 1.614937714657129, "learning_rate": 2.5135921583773036e-07, "loss": 0.6202, "step": 14087 }, { "epoch": 0.9, "grad_norm": 1.7123865086618268, "learning_rate": 2.5103479746278193e-07, "loss": 0.6912, "step": 14088 }, { "epoch": 0.9, "grad_norm": 1.5481153502809248, "learning_rate": 2.507105831883794e-07, "loss": 0.6477, "step": 14089 }, { "epoch": 0.9, "grad_norm": 1.501838345002581, "learning_rate": 2.503865730284566e-07, "loss": 0.5944, "step": 14090 }, { "epoch": 0.9, "grad_norm": 1.9335356515508906, "learning_rate": 2.5006276699693854e-07, "loss": 0.6489, "step": 14091 }, { "epoch": 0.9, "grad_norm": 1.0455354853729613, "learning_rate": 2.497391651077419e-07, "loss": 0.5639, "step": 14092 }, { "epoch": 0.9, "grad_norm": 1.4641199405306855, "learning_rate": 2.4941576737477435e-07, "loss": 0.6014, "step": 14093 }, { "epoch": 0.9, "grad_norm": 1.5610607600696476, "learning_rate": 2.490925738119343e-07, "loss": 0.6684, "step": 14094 }, { "epoch": 0.9, "grad_norm": 1.703700832595782, "learning_rate": 2.487695844331128e-07, "loss": 0.5782, "step": 14095 }, { "epoch": 0.9, "grad_norm": 1.6379299442338522, "learning_rate": 2.4844679925218994e-07, "loss": 0.6015, "step": 14096 }, { "epoch": 0.9, "grad_norm": 1.4793901509632317, "learning_rate": 2.481242182830401e-07, "loss": 0.6947, "step": 14097 }, { "epoch": 0.9, "grad_norm": 1.7481616362213241, "learning_rate": 2.4780184153952615e-07, "loss": 0.6691, "step": 14098 }, { "epoch": 0.9, "grad_norm": 1.1044931521149028, "learning_rate": 2.4747966903550355e-07, "loss": 0.6739, "step": 14099 }, { "epoch": 0.9, "grad_norm": 1.5677592365606396, "learning_rate": 2.47157700784818e-07, "loss": 0.6715, "step": 14100 }, { "epoch": 0.9, "grad_norm": 1.5995350039958773, "learning_rate": 2.4683593680130734e-07, "loss": 0.6678, "step": 14101 }, { "epoch": 0.9, "grad_norm": 1.5547432953301683, "learning_rate": 2.46514377098801e-07, "loss": 0.6915, "step": 14102 }, { "epoch": 0.9, "grad_norm": 1.187415315811341, "learning_rate": 2.461930216911168e-07, "loss": 0.6965, "step": 14103 }, { "epoch": 0.9, "grad_norm": 1.3078665273868677, "learning_rate": 2.458718705920693e-07, "loss": 0.7022, "step": 14104 }, { "epoch": 0.9, "grad_norm": 1.682318213579287, "learning_rate": 2.455509238154574e-07, "loss": 0.7836, "step": 14105 }, { "epoch": 0.9, "grad_norm": 1.7363940039566277, "learning_rate": 2.4523018137507736e-07, "loss": 0.6146, "step": 14106 }, { "epoch": 0.9, "grad_norm": 1.531075385367786, "learning_rate": 2.4490964328471257e-07, "loss": 0.544, "step": 14107 }, { "epoch": 0.9, "grad_norm": 1.4328673793930256, "learning_rate": 2.445893095581392e-07, "loss": 0.6396, "step": 14108 }, { "epoch": 0.9, "grad_norm": 1.5917998803408107, "learning_rate": 2.442691802091257e-07, "loss": 0.6219, "step": 14109 }, { "epoch": 0.9, "grad_norm": 1.4340368681240168, "learning_rate": 2.4394925525142834e-07, "loss": 0.6747, "step": 14110 }, { "epoch": 0.9, "grad_norm": 1.4967047328348904, "learning_rate": 2.4362953469879934e-07, "loss": 0.6077, "step": 14111 }, { "epoch": 0.9, "grad_norm": 1.7255759833609456, "learning_rate": 2.4331001856497784e-07, "loss": 0.6435, "step": 14112 }, { "epoch": 0.9, "grad_norm": 1.5435673597729789, "learning_rate": 2.429907068636972e-07, "loss": 0.6918, "step": 14113 }, { "epoch": 0.9, "grad_norm": 1.7706744597090256, "learning_rate": 2.4267159960867927e-07, "loss": 0.5674, "step": 14114 }, { "epoch": 0.9, "grad_norm": 1.5969902515615049, "learning_rate": 2.423526968136397e-07, "loss": 0.6796, "step": 14115 }, { "epoch": 0.9, "grad_norm": 1.6031035684892323, "learning_rate": 2.420339984922843e-07, "loss": 0.6364, "step": 14116 }, { "epoch": 0.9, "grad_norm": 1.6980663322769862, "learning_rate": 2.4171550465830974e-07, "loss": 0.6413, "step": 14117 }, { "epoch": 0.9, "grad_norm": 1.7475443011246303, "learning_rate": 2.4139721532540405e-07, "loss": 0.7045, "step": 14118 }, { "epoch": 0.9, "grad_norm": 1.0754645505474374, "learning_rate": 2.4107913050724627e-07, "loss": 0.6521, "step": 14119 }, { "epoch": 0.9, "grad_norm": 1.0360189501371815, "learning_rate": 2.407612502175094e-07, "loss": 0.6563, "step": 14120 }, { "epoch": 0.9, "grad_norm": 1.63963229516731, "learning_rate": 2.4044357446985134e-07, "loss": 0.6906, "step": 14121 }, { "epoch": 0.9, "grad_norm": 1.0438641201980694, "learning_rate": 2.4012610327792895e-07, "loss": 0.6236, "step": 14122 }, { "epoch": 0.9, "grad_norm": 1.6291922859489685, "learning_rate": 2.398088366553836e-07, "loss": 0.6226, "step": 14123 }, { "epoch": 0.9, "grad_norm": 1.7099376800420998, "learning_rate": 2.3949177461585263e-07, "loss": 0.5869, "step": 14124 }, { "epoch": 0.9, "grad_norm": 1.7291310777565827, "learning_rate": 2.3917491717296184e-07, "loss": 0.7072, "step": 14125 }, { "epoch": 0.9, "grad_norm": 1.561430847378981, "learning_rate": 2.388582643403281e-07, "loss": 0.6453, "step": 14126 }, { "epoch": 0.9, "grad_norm": 1.1411120994624324, "learning_rate": 2.385418161315639e-07, "loss": 0.722, "step": 14127 }, { "epoch": 0.9, "grad_norm": 1.5572713321467653, "learning_rate": 2.38225572560265e-07, "loss": 0.6795, "step": 14128 }, { "epoch": 0.9, "grad_norm": 1.526669410978097, "learning_rate": 2.3790953364002722e-07, "loss": 0.6079, "step": 14129 }, { "epoch": 0.9, "grad_norm": 1.7725388146138983, "learning_rate": 2.375936993844291e-07, "loss": 0.6837, "step": 14130 }, { "epoch": 0.9, "grad_norm": 1.6872661825297304, "learning_rate": 2.372780698070476e-07, "loss": 0.5399, "step": 14131 }, { "epoch": 0.9, "grad_norm": 1.6510804362120466, "learning_rate": 2.3696264492144684e-07, "loss": 0.6946, "step": 14132 }, { "epoch": 0.9, "grad_norm": 0.9625271937776435, "learning_rate": 2.3664742474118317e-07, "loss": 0.6588, "step": 14133 }, { "epoch": 0.9, "grad_norm": 1.5392807553979415, "learning_rate": 2.363324092798036e-07, "loss": 0.6588, "step": 14134 }, { "epoch": 0.9, "grad_norm": 1.3266998985625906, "learning_rate": 2.3601759855084672e-07, "loss": 0.643, "step": 14135 }, { "epoch": 0.9, "grad_norm": 1.5509111736292196, "learning_rate": 2.3570299256784446e-07, "loss": 0.6613, "step": 14136 }, { "epoch": 0.9, "grad_norm": 1.6271312092100254, "learning_rate": 2.3538859134431547e-07, "loss": 0.5561, "step": 14137 }, { "epoch": 0.9, "grad_norm": 1.7061630885773515, "learning_rate": 2.350743948937728e-07, "loss": 0.6569, "step": 14138 }, { "epoch": 0.9, "grad_norm": 1.2249884631242678, "learning_rate": 2.347604032297207e-07, "loss": 0.7048, "step": 14139 }, { "epoch": 0.91, "grad_norm": 1.8465080279136188, "learning_rate": 2.3444661636565337e-07, "loss": 0.7298, "step": 14140 }, { "epoch": 0.91, "grad_norm": 1.6000899943402436, "learning_rate": 2.3413303431505606e-07, "loss": 0.5774, "step": 14141 }, { "epoch": 0.91, "grad_norm": 1.6311461975138262, "learning_rate": 2.3381965709140696e-07, "loss": 0.8358, "step": 14142 }, { "epoch": 0.91, "grad_norm": 1.1294468481042679, "learning_rate": 2.3350648470817416e-07, "loss": 0.6657, "step": 14143 }, { "epoch": 0.91, "grad_norm": 1.7269470714602877, "learning_rate": 2.331935171788158e-07, "loss": 0.6727, "step": 14144 }, { "epoch": 0.91, "grad_norm": 1.1892547818450427, "learning_rate": 2.3288075451678381e-07, "loss": 0.7245, "step": 14145 }, { "epoch": 0.91, "grad_norm": 1.5574182077985004, "learning_rate": 2.325681967355209e-07, "loss": 0.6236, "step": 14146 }, { "epoch": 0.91, "grad_norm": 1.680574921873633, "learning_rate": 2.3225584384845845e-07, "loss": 0.7591, "step": 14147 }, { "epoch": 0.91, "grad_norm": 1.5590766343942535, "learning_rate": 2.3194369586902132e-07, "loss": 0.6338, "step": 14148 }, { "epoch": 0.91, "grad_norm": 1.515411357417583, "learning_rate": 2.3163175281062545e-07, "loss": 0.6293, "step": 14149 }, { "epoch": 0.91, "grad_norm": 2.0934612072265706, "learning_rate": 2.313200146866773e-07, "loss": 0.6333, "step": 14150 }, { "epoch": 0.91, "grad_norm": 1.6647719952084388, "learning_rate": 2.31008481510574e-07, "loss": 0.6411, "step": 14151 }, { "epoch": 0.91, "grad_norm": 2.0206455483542047, "learning_rate": 2.3069715329570475e-07, "loss": 0.6099, "step": 14152 }, { "epoch": 0.91, "grad_norm": 1.046003540796839, "learning_rate": 2.3038603005545113e-07, "loss": 0.6356, "step": 14153 }, { "epoch": 0.91, "grad_norm": 1.1841103433981854, "learning_rate": 2.3007511180318298e-07, "loss": 0.6412, "step": 14154 }, { "epoch": 0.91, "grad_norm": 1.3981380162552917, "learning_rate": 2.2976439855226406e-07, "loss": 0.6507, "step": 14155 }, { "epoch": 0.91, "grad_norm": 1.5001017977231876, "learning_rate": 2.294538903160476e-07, "loss": 0.6658, "step": 14156 }, { "epoch": 0.91, "grad_norm": 2.082455445799924, "learning_rate": 2.2914358710787842e-07, "loss": 0.6243, "step": 14157 }, { "epoch": 0.91, "grad_norm": 1.494530565862196, "learning_rate": 2.2883348894109259e-07, "loss": 0.637, "step": 14158 }, { "epoch": 0.91, "grad_norm": 1.9104818338050058, "learning_rate": 2.2852359582901828e-07, "loss": 0.694, "step": 14159 }, { "epoch": 0.91, "grad_norm": 1.4686847610653446, "learning_rate": 2.2821390778497377e-07, "loss": 0.687, "step": 14160 }, { "epoch": 0.91, "grad_norm": 1.6252996648519291, "learning_rate": 2.2790442482226727e-07, "loss": 0.6111, "step": 14161 }, { "epoch": 0.91, "grad_norm": 1.422619018039137, "learning_rate": 2.2759514695420204e-07, "loss": 0.6441, "step": 14162 }, { "epoch": 0.91, "grad_norm": 2.603558688349581, "learning_rate": 2.2728607419406967e-07, "loss": 0.7128, "step": 14163 }, { "epoch": 0.91, "grad_norm": 1.8864159443861268, "learning_rate": 2.2697720655515232e-07, "loss": 0.6303, "step": 14164 }, { "epoch": 0.91, "grad_norm": 0.9871322419168257, "learning_rate": 2.2666854405072546e-07, "loss": 0.6068, "step": 14165 }, { "epoch": 0.91, "grad_norm": 1.8422979299548095, "learning_rate": 2.2636008669405408e-07, "loss": 0.5728, "step": 14166 }, { "epoch": 0.91, "grad_norm": 1.516618294802858, "learning_rate": 2.2605183449839585e-07, "loss": 0.6175, "step": 14167 }, { "epoch": 0.91, "grad_norm": 1.7123941055526242, "learning_rate": 2.2574378747699743e-07, "loss": 0.6145, "step": 14168 }, { "epoch": 0.91, "grad_norm": 1.5693967391182069, "learning_rate": 2.2543594564309989e-07, "loss": 0.6954, "step": 14169 }, { "epoch": 0.91, "grad_norm": 1.9331484281749178, "learning_rate": 2.2512830900993155e-07, "loss": 0.6784, "step": 14170 }, { "epoch": 0.91, "grad_norm": 1.5437689942036368, "learning_rate": 2.2482087759071625e-07, "loss": 0.6727, "step": 14171 }, { "epoch": 0.91, "grad_norm": 1.5049740602786628, "learning_rate": 2.245136513986651e-07, "loss": 0.6619, "step": 14172 }, { "epoch": 0.91, "grad_norm": 1.5628514285228448, "learning_rate": 2.2420663044698254e-07, "loss": 0.5916, "step": 14173 }, { "epoch": 0.91, "grad_norm": 1.141008335641196, "learning_rate": 2.2389981474886413e-07, "loss": 0.6016, "step": 14174 }, { "epoch": 0.91, "grad_norm": 1.6446471992390046, "learning_rate": 2.2359320431749432e-07, "loss": 0.6986, "step": 14175 }, { "epoch": 0.91, "grad_norm": 1.5502262218134102, "learning_rate": 2.2328679916605368e-07, "loss": 0.7087, "step": 14176 }, { "epoch": 0.91, "grad_norm": 1.7206891845792376, "learning_rate": 2.2298059930770833e-07, "loss": 0.6104, "step": 14177 }, { "epoch": 0.91, "grad_norm": 1.5414274988256007, "learning_rate": 2.2267460475561942e-07, "loss": 0.6886, "step": 14178 }, { "epoch": 0.91, "grad_norm": 1.3480263415101617, "learning_rate": 2.2236881552293642e-07, "loss": 0.7918, "step": 14179 }, { "epoch": 0.91, "grad_norm": 1.4729537761891167, "learning_rate": 2.220632316228033e-07, "loss": 0.6113, "step": 14180 }, { "epoch": 0.91, "grad_norm": 1.5576954360963486, "learning_rate": 2.2175785306835285e-07, "loss": 0.7373, "step": 14181 }, { "epoch": 0.91, "grad_norm": 1.5029336473814372, "learning_rate": 2.2145267987270847e-07, "loss": 0.6449, "step": 14182 }, { "epoch": 0.91, "grad_norm": 1.605969926694281, "learning_rate": 2.21147712048988e-07, "loss": 0.6182, "step": 14183 }, { "epoch": 0.91, "grad_norm": 1.521061008055761, "learning_rate": 2.2084294961029596e-07, "loss": 0.6128, "step": 14184 }, { "epoch": 0.91, "grad_norm": 1.4702872241532092, "learning_rate": 2.2053839256973297e-07, "loss": 0.6138, "step": 14185 }, { "epoch": 0.91, "grad_norm": 1.423375623518538, "learning_rate": 2.2023404094038524e-07, "loss": 0.6972, "step": 14186 }, { "epoch": 0.91, "grad_norm": 1.329478840315756, "learning_rate": 2.1992989473533566e-07, "loss": 0.5852, "step": 14187 }, { "epoch": 0.91, "grad_norm": 1.5348219142819441, "learning_rate": 2.1962595396765486e-07, "loss": 0.689, "step": 14188 }, { "epoch": 0.91, "grad_norm": 1.5431343237652217, "learning_rate": 2.1932221865040572e-07, "loss": 0.6205, "step": 14189 }, { "epoch": 0.91, "grad_norm": 1.5453569057583774, "learning_rate": 2.1901868879664168e-07, "loss": 0.6662, "step": 14190 }, { "epoch": 0.91, "grad_norm": 1.5332128725413285, "learning_rate": 2.187153644194079e-07, "loss": 0.6204, "step": 14191 }, { "epoch": 0.91, "grad_norm": 1.782109102552337, "learning_rate": 2.1841224553174222e-07, "loss": 0.6019, "step": 14192 }, { "epoch": 0.91, "grad_norm": 1.4217574813668288, "learning_rate": 2.1810933214666928e-07, "loss": 0.6858, "step": 14193 }, { "epoch": 0.91, "grad_norm": 1.5160288626697447, "learning_rate": 2.178066242772092e-07, "loss": 0.7051, "step": 14194 }, { "epoch": 0.91, "grad_norm": 1.6165946854887778, "learning_rate": 2.1750412193637216e-07, "loss": 0.6259, "step": 14195 }, { "epoch": 0.91, "grad_norm": 2.499386208150294, "learning_rate": 2.1720182513715882e-07, "loss": 0.6789, "step": 14196 }, { "epoch": 0.91, "grad_norm": 1.5226817556041072, "learning_rate": 2.1689973389256047e-07, "loss": 0.622, "step": 14197 }, { "epoch": 0.91, "grad_norm": 2.2829963749474578, "learning_rate": 2.1659784821556117e-07, "loss": 0.706, "step": 14198 }, { "epoch": 0.91, "grad_norm": 1.3871117188791735, "learning_rate": 2.1629616811913502e-07, "loss": 0.6801, "step": 14199 }, { "epoch": 0.91, "grad_norm": 1.6392850767586784, "learning_rate": 2.1599469361624714e-07, "loss": 0.6206, "step": 14200 }, { "epoch": 0.91, "grad_norm": 1.609449486291697, "learning_rate": 2.1569342471985556e-07, "loss": 0.6701, "step": 14201 }, { "epoch": 0.91, "grad_norm": 1.54813155796446, "learning_rate": 2.1539236144290653e-07, "loss": 0.6184, "step": 14202 }, { "epoch": 0.91, "grad_norm": 1.4728969689816604, "learning_rate": 2.150915037983403e-07, "loss": 0.5845, "step": 14203 }, { "epoch": 0.91, "grad_norm": 1.6091849429579457, "learning_rate": 2.147908517990871e-07, "loss": 0.6514, "step": 14204 }, { "epoch": 0.91, "grad_norm": 1.5911134967442346, "learning_rate": 2.1449040545806766e-07, "loss": 0.7315, "step": 14205 }, { "epoch": 0.91, "grad_norm": 1.590112290447378, "learning_rate": 2.14190164788195e-07, "loss": 0.7623, "step": 14206 }, { "epoch": 0.91, "grad_norm": 1.4933371115223981, "learning_rate": 2.1389012980237267e-07, "loss": 0.6393, "step": 14207 }, { "epoch": 0.91, "grad_norm": 1.5616887196707938, "learning_rate": 2.1359030051349538e-07, "loss": 0.6587, "step": 14208 }, { "epoch": 0.91, "grad_norm": 1.866715036236745, "learning_rate": 2.1329067693444893e-07, "loss": 0.6614, "step": 14209 }, { "epoch": 0.91, "grad_norm": 1.5011573639004394, "learning_rate": 2.1299125907811136e-07, "loss": 0.7245, "step": 14210 }, { "epoch": 0.91, "grad_norm": 1.6056028436865741, "learning_rate": 2.126920469573507e-07, "loss": 0.7001, "step": 14211 }, { "epoch": 0.91, "grad_norm": 1.5331454554828163, "learning_rate": 2.1239304058502663e-07, "loss": 0.649, "step": 14212 }, { "epoch": 0.91, "grad_norm": 1.6272537966344258, "learning_rate": 2.1209423997398893e-07, "loss": 0.7255, "step": 14213 }, { "epoch": 0.91, "grad_norm": 1.6641791860137938, "learning_rate": 2.1179564513708062e-07, "loss": 0.6573, "step": 14214 }, { "epoch": 0.91, "grad_norm": 2.100685608617515, "learning_rate": 2.1149725608713368e-07, "loss": 0.7271, "step": 14215 }, { "epoch": 0.91, "grad_norm": 1.6421760714397982, "learning_rate": 2.1119907283697282e-07, "loss": 0.7052, "step": 14216 }, { "epoch": 0.91, "grad_norm": 1.766321474458579, "learning_rate": 2.109010953994123e-07, "loss": 0.6495, "step": 14217 }, { "epoch": 0.91, "grad_norm": 1.5619971211422798, "learning_rate": 2.106033237872601e-07, "loss": 0.5771, "step": 14218 }, { "epoch": 0.91, "grad_norm": 1.1463067142997712, "learning_rate": 2.1030575801331332e-07, "loss": 0.6886, "step": 14219 }, { "epoch": 0.91, "grad_norm": 1.6308584267503965, "learning_rate": 2.1000839809036055e-07, "loss": 0.6456, "step": 14220 }, { "epoch": 0.91, "grad_norm": 1.717625107086087, "learning_rate": 2.097112440311816e-07, "loss": 0.7206, "step": 14221 }, { "epoch": 0.91, "grad_norm": 1.1935496018811433, "learning_rate": 2.0941429584854788e-07, "loss": 0.6362, "step": 14222 }, { "epoch": 0.91, "grad_norm": 1.6080828002718281, "learning_rate": 2.0911755355522089e-07, "loss": 0.7101, "step": 14223 }, { "epoch": 0.91, "grad_norm": 1.536998626155611, "learning_rate": 2.0882101716395376e-07, "loss": 0.6199, "step": 14224 }, { "epoch": 0.91, "grad_norm": 1.6483031934052432, "learning_rate": 2.0852468668749294e-07, "loss": 0.706, "step": 14225 }, { "epoch": 0.91, "grad_norm": 1.095602532518373, "learning_rate": 2.0822856213857158e-07, "loss": 0.6888, "step": 14226 }, { "epoch": 0.91, "grad_norm": 2.2794232102856546, "learning_rate": 2.0793264352991894e-07, "loss": 0.5847, "step": 14227 }, { "epoch": 0.91, "grad_norm": 0.921760833138497, "learning_rate": 2.0763693087425095e-07, "loss": 0.6459, "step": 14228 }, { "epoch": 0.91, "grad_norm": 1.5729198817878047, "learning_rate": 2.0734142418427806e-07, "loss": 0.6589, "step": 14229 }, { "epoch": 0.91, "grad_norm": 1.7961852037550474, "learning_rate": 2.0704612347269948e-07, "loss": 0.8013, "step": 14230 }, { "epoch": 0.91, "grad_norm": 1.5743736634692513, "learning_rate": 2.067510287522073e-07, "loss": 0.6517, "step": 14231 }, { "epoch": 0.91, "grad_norm": 1.7221309323383827, "learning_rate": 2.064561400354842e-07, "loss": 0.5591, "step": 14232 }, { "epoch": 0.91, "grad_norm": 1.384364666469038, "learning_rate": 2.0616145733520276e-07, "loss": 0.5223, "step": 14233 }, { "epoch": 0.91, "grad_norm": 1.6953301719983316, "learning_rate": 2.058669806640301e-07, "loss": 0.617, "step": 14234 }, { "epoch": 0.91, "grad_norm": 1.4438322017992529, "learning_rate": 2.0557271003461942e-07, "loss": 0.5971, "step": 14235 }, { "epoch": 0.91, "grad_norm": 2.543800120048913, "learning_rate": 2.052786454596195e-07, "loss": 0.5845, "step": 14236 }, { "epoch": 0.91, "grad_norm": 1.4308744620355562, "learning_rate": 2.0498478695166857e-07, "loss": 0.5979, "step": 14237 }, { "epoch": 0.91, "grad_norm": 1.6702216681296094, "learning_rate": 2.046911345233954e-07, "loss": 0.6224, "step": 14238 }, { "epoch": 0.91, "grad_norm": 2.0176196260599637, "learning_rate": 2.0439768818742156e-07, "loss": 0.6631, "step": 14239 }, { "epoch": 0.91, "grad_norm": 1.7245305769288852, "learning_rate": 2.0410444795635697e-07, "loss": 0.7118, "step": 14240 }, { "epoch": 0.91, "grad_norm": 1.560959738780906, "learning_rate": 2.0381141384280711e-07, "loss": 0.7043, "step": 14241 }, { "epoch": 0.91, "grad_norm": 1.6667982176779192, "learning_rate": 2.0351858585936356e-07, "loss": 0.6862, "step": 14242 }, { "epoch": 0.91, "grad_norm": 1.559167176924218, "learning_rate": 2.0322596401861294e-07, "loss": 0.6311, "step": 14243 }, { "epoch": 0.91, "grad_norm": 1.1230271109600027, "learning_rate": 2.0293354833313018e-07, "loss": 0.638, "step": 14244 }, { "epoch": 0.91, "grad_norm": 1.375432740240531, "learning_rate": 2.026413388154841e-07, "loss": 0.6589, "step": 14245 }, { "epoch": 0.91, "grad_norm": 1.5749466929783893, "learning_rate": 2.0234933547823242e-07, "loss": 0.6386, "step": 14246 }, { "epoch": 0.91, "grad_norm": 1.7279263173051598, "learning_rate": 2.020575383339246e-07, "loss": 0.6327, "step": 14247 }, { "epoch": 0.91, "grad_norm": 1.6042045864546866, "learning_rate": 2.0176594739510336e-07, "loss": 0.8147, "step": 14248 }, { "epoch": 0.91, "grad_norm": 1.3075992465873443, "learning_rate": 2.0147456267429754e-07, "loss": 0.7055, "step": 14249 }, { "epoch": 0.91, "grad_norm": 1.6234922873607602, "learning_rate": 2.0118338418403382e-07, "loss": 0.6329, "step": 14250 }, { "epoch": 0.91, "grad_norm": 1.4588752808091516, "learning_rate": 2.0089241193682273e-07, "loss": 0.5812, "step": 14251 }, { "epoch": 0.91, "grad_norm": 1.4466281745047458, "learning_rate": 2.0060164594517206e-07, "loss": 0.6053, "step": 14252 }, { "epoch": 0.91, "grad_norm": 1.7816148420907443, "learning_rate": 2.003110862215779e-07, "loss": 0.7019, "step": 14253 }, { "epoch": 0.91, "grad_norm": 1.4147218631217733, "learning_rate": 2.000207327785275e-07, "loss": 0.6684, "step": 14254 }, { "epoch": 0.91, "grad_norm": 1.550081104290106, "learning_rate": 1.9973058562850033e-07, "loss": 0.723, "step": 14255 }, { "epoch": 0.91, "grad_norm": 1.4298911210053769, "learning_rate": 1.994406447839653e-07, "loss": 0.6428, "step": 14256 }, { "epoch": 0.91, "grad_norm": 1.429189442142065, "learning_rate": 1.9915091025738464e-07, "loss": 0.5764, "step": 14257 }, { "epoch": 0.91, "grad_norm": 1.8690131315366272, "learning_rate": 1.9886138206120896e-07, "loss": 0.6233, "step": 14258 }, { "epoch": 0.91, "grad_norm": 1.2444125890371973, "learning_rate": 1.985720602078828e-07, "loss": 0.7311, "step": 14259 }, { "epoch": 0.91, "grad_norm": 1.526654643622502, "learning_rate": 1.9828294470984054e-07, "loss": 0.6516, "step": 14260 }, { "epoch": 0.91, "grad_norm": 1.8924489161930573, "learning_rate": 1.9799403557950793e-07, "loss": 0.6619, "step": 14261 }, { "epoch": 0.91, "grad_norm": 1.503940812417566, "learning_rate": 1.977053328293005e-07, "loss": 0.6749, "step": 14262 }, { "epoch": 0.91, "grad_norm": 1.3943996718172194, "learning_rate": 1.9741683647162724e-07, "loss": 0.6845, "step": 14263 }, { "epoch": 0.91, "grad_norm": 1.510732803539874, "learning_rate": 1.9712854651888713e-07, "loss": 0.6089, "step": 14264 }, { "epoch": 0.91, "grad_norm": 1.1566074969661297, "learning_rate": 1.9684046298346858e-07, "loss": 0.6596, "step": 14265 }, { "epoch": 0.91, "grad_norm": 1.356626274640149, "learning_rate": 1.9655258587775505e-07, "loss": 0.699, "step": 14266 }, { "epoch": 0.91, "grad_norm": 1.6324029647618024, "learning_rate": 1.9626491521411773e-07, "loss": 0.662, "step": 14267 }, { "epoch": 0.91, "grad_norm": 1.7032866076043292, "learning_rate": 1.959774510049206e-07, "loss": 0.6063, "step": 14268 }, { "epoch": 0.91, "grad_norm": 1.154776488645027, "learning_rate": 1.956901932625177e-07, "loss": 0.74, "step": 14269 }, { "epoch": 0.91, "grad_norm": 1.561808548925373, "learning_rate": 1.9540314199925525e-07, "loss": 0.6758, "step": 14270 }, { "epoch": 0.91, "grad_norm": 1.7292671204720282, "learning_rate": 1.9511629722747004e-07, "loss": 0.6837, "step": 14271 }, { "epoch": 0.91, "grad_norm": 1.5613498614508348, "learning_rate": 1.9482965895948947e-07, "loss": 0.7018, "step": 14272 }, { "epoch": 0.91, "grad_norm": 1.3744453261560448, "learning_rate": 1.9454322720763364e-07, "loss": 0.7133, "step": 14273 }, { "epoch": 0.91, "grad_norm": 1.5104491423141737, "learning_rate": 1.9425700198421104e-07, "loss": 0.6137, "step": 14274 }, { "epoch": 0.91, "grad_norm": 1.6918892671091492, "learning_rate": 1.939709833015252e-07, "loss": 0.745, "step": 14275 }, { "epoch": 0.91, "grad_norm": 1.4189466343932564, "learning_rate": 1.9368517117186737e-07, "loss": 0.6077, "step": 14276 }, { "epoch": 0.91, "grad_norm": 1.4861374349311176, "learning_rate": 1.9339956560752216e-07, "loss": 0.6069, "step": 14277 }, { "epoch": 0.91, "grad_norm": 1.676221381869194, "learning_rate": 1.9311416662076253e-07, "loss": 0.6787, "step": 14278 }, { "epoch": 0.91, "grad_norm": 1.5597869187475764, "learning_rate": 1.9282897422385593e-07, "loss": 0.7085, "step": 14279 }, { "epoch": 0.91, "grad_norm": 1.5502669270642695, "learning_rate": 1.9254398842905918e-07, "loss": 0.6667, "step": 14280 }, { "epoch": 0.91, "grad_norm": 1.4300447900713906, "learning_rate": 1.9225920924861917e-07, "loss": 0.6836, "step": 14281 }, { "epoch": 0.91, "grad_norm": 1.492408638248816, "learning_rate": 1.9197463669477557e-07, "loss": 0.6314, "step": 14282 }, { "epoch": 0.91, "grad_norm": 1.5271229404984386, "learning_rate": 1.9169027077975965e-07, "loss": 0.6839, "step": 14283 }, { "epoch": 0.91, "grad_norm": 1.4867791515789308, "learning_rate": 1.9140611151579224e-07, "loss": 0.6278, "step": 14284 }, { "epoch": 0.91, "grad_norm": 1.6333287929226996, "learning_rate": 1.9112215891508635e-07, "loss": 0.6584, "step": 14285 }, { "epoch": 0.91, "grad_norm": 1.4805189503134657, "learning_rate": 1.908384129898444e-07, "loss": 0.6277, "step": 14286 }, { "epoch": 0.91, "grad_norm": 1.580792696646142, "learning_rate": 1.905548737522628e-07, "loss": 0.5911, "step": 14287 }, { "epoch": 0.91, "grad_norm": 2.3657046678798586, "learning_rate": 1.9027154121452618e-07, "loss": 0.5832, "step": 14288 }, { "epoch": 0.91, "grad_norm": 1.4833166473287487, "learning_rate": 1.899884153888115e-07, "loss": 0.6608, "step": 14289 }, { "epoch": 0.91, "grad_norm": 1.5373870228284423, "learning_rate": 1.8970549628728908e-07, "loss": 0.5615, "step": 14290 }, { "epoch": 0.91, "grad_norm": 1.573717680072815, "learning_rate": 1.8942278392211466e-07, "loss": 0.6509, "step": 14291 }, { "epoch": 0.91, "grad_norm": 1.6187018846373158, "learning_rate": 1.891402783054419e-07, "loss": 0.7074, "step": 14292 }, { "epoch": 0.91, "grad_norm": 1.465275704950953, "learning_rate": 1.8885797944941052e-07, "loss": 0.6776, "step": 14293 }, { "epoch": 0.91, "grad_norm": 1.842775837904042, "learning_rate": 1.8857588736615418e-07, "loss": 0.7041, "step": 14294 }, { "epoch": 0.91, "grad_norm": 1.947612303704929, "learning_rate": 1.8829400206779536e-07, "loss": 0.7295, "step": 14295 }, { "epoch": 0.92, "grad_norm": 1.6627484621097597, "learning_rate": 1.8801232356644938e-07, "loss": 0.6863, "step": 14296 }, { "epoch": 0.92, "grad_norm": 1.4125002013886396, "learning_rate": 1.8773085187422325e-07, "loss": 0.632, "step": 14297 }, { "epoch": 0.92, "grad_norm": 1.5230059460266578, "learning_rate": 1.8744958700321225e-07, "loss": 0.6286, "step": 14298 }, { "epoch": 0.92, "grad_norm": 1.5906908700110862, "learning_rate": 1.8716852896550618e-07, "loss": 0.6093, "step": 14299 }, { "epoch": 0.92, "grad_norm": 1.4804271176295445, "learning_rate": 1.8688767777318262e-07, "loss": 0.5241, "step": 14300 }, { "epoch": 0.92, "grad_norm": 1.3560508330948886, "learning_rate": 1.8660703343831354e-07, "loss": 0.7594, "step": 14301 }, { "epoch": 0.92, "grad_norm": 1.3811945990412813, "learning_rate": 1.863265959729599e-07, "loss": 0.6418, "step": 14302 }, { "epoch": 0.92, "grad_norm": 1.4516257121966523, "learning_rate": 1.8604636538917365e-07, "loss": 0.6321, "step": 14303 }, { "epoch": 0.92, "grad_norm": 1.5983362749208678, "learning_rate": 1.8576634169900022e-07, "loss": 0.5944, "step": 14304 }, { "epoch": 0.92, "grad_norm": 1.4177336723032128, "learning_rate": 1.8548652491447217e-07, "loss": 0.6445, "step": 14305 }, { "epoch": 0.92, "grad_norm": 1.5523055872640392, "learning_rate": 1.8520691504761769e-07, "loss": 0.6241, "step": 14306 }, { "epoch": 0.92, "grad_norm": 1.7593978999889575, "learning_rate": 1.8492751211045156e-07, "loss": 0.5955, "step": 14307 }, { "epoch": 0.92, "grad_norm": 1.8535328594732627, "learning_rate": 1.8464831611498367e-07, "loss": 0.6535, "step": 14308 }, { "epoch": 0.92, "grad_norm": 1.4387594218787627, "learning_rate": 1.8436932707321276e-07, "loss": 0.6189, "step": 14309 }, { "epoch": 0.92, "grad_norm": 1.5249704166874674, "learning_rate": 1.840905449971292e-07, "loss": 0.6095, "step": 14310 }, { "epoch": 0.92, "grad_norm": 1.0912585214171058, "learning_rate": 1.8381196989871453e-07, "loss": 0.6307, "step": 14311 }, { "epoch": 0.92, "grad_norm": 2.0230704855023367, "learning_rate": 1.835336017899403e-07, "loss": 0.6777, "step": 14312 }, { "epoch": 0.92, "grad_norm": 1.8756790031622261, "learning_rate": 1.8325544068277244e-07, "loss": 0.4821, "step": 14313 }, { "epoch": 0.92, "grad_norm": 3.6228651528574445, "learning_rate": 1.8297748658916314e-07, "loss": 0.6601, "step": 14314 }, { "epoch": 0.92, "grad_norm": 1.5779750377452968, "learning_rate": 1.8269973952106057e-07, "loss": 0.6481, "step": 14315 }, { "epoch": 0.92, "grad_norm": 1.4068768687559092, "learning_rate": 1.8242219949039962e-07, "loss": 0.636, "step": 14316 }, { "epoch": 0.92, "grad_norm": 1.5717194617047032, "learning_rate": 1.8214486650911022e-07, "loss": 0.6822, "step": 14317 }, { "epoch": 0.92, "grad_norm": 1.5073556906359493, "learning_rate": 1.8186774058911005e-07, "loss": 0.6323, "step": 14318 }, { "epoch": 0.92, "grad_norm": 1.6769211460292108, "learning_rate": 1.8159082174231012e-07, "loss": 0.7035, "step": 14319 }, { "epoch": 0.92, "grad_norm": 1.4784442330318723, "learning_rate": 1.8131410998061261e-07, "loss": 0.5994, "step": 14320 }, { "epoch": 0.92, "grad_norm": 1.4537226488945596, "learning_rate": 1.8103760531590851e-07, "loss": 0.6006, "step": 14321 }, { "epoch": 0.92, "grad_norm": 1.4287038518288981, "learning_rate": 1.8076130776008283e-07, "loss": 0.6502, "step": 14322 }, { "epoch": 0.92, "grad_norm": 1.180741168562449, "learning_rate": 1.8048521732500878e-07, "loss": 0.5578, "step": 14323 }, { "epoch": 0.92, "grad_norm": 1.521292118347659, "learning_rate": 1.8020933402255304e-07, "loss": 0.688, "step": 14324 }, { "epoch": 0.92, "grad_norm": 1.6546236207723841, "learning_rate": 1.7993365786457217e-07, "loss": 0.605, "step": 14325 }, { "epoch": 0.92, "grad_norm": 1.5027162488625356, "learning_rate": 1.7965818886291508e-07, "loss": 0.5829, "step": 14326 }, { "epoch": 0.92, "grad_norm": 1.071684723534241, "learning_rate": 1.793829270294195e-07, "loss": 0.6397, "step": 14327 }, { "epoch": 0.92, "grad_norm": 1.4442733783913362, "learning_rate": 1.7910787237591598e-07, "loss": 0.6884, "step": 14328 }, { "epoch": 0.92, "grad_norm": 1.510363846779395, "learning_rate": 1.7883302491422673e-07, "loss": 0.6712, "step": 14329 }, { "epoch": 0.92, "grad_norm": 1.832318146779947, "learning_rate": 1.7855838465616283e-07, "loss": 0.6812, "step": 14330 }, { "epoch": 0.92, "grad_norm": 1.80738311880303, "learning_rate": 1.782839516135282e-07, "loss": 0.591, "step": 14331 }, { "epoch": 0.92, "grad_norm": 1.5463344680776492, "learning_rate": 1.7800972579811783e-07, "loss": 0.6666, "step": 14332 }, { "epoch": 0.92, "grad_norm": 1.5659434490928728, "learning_rate": 1.777357072217173e-07, "loss": 0.6614, "step": 14333 }, { "epoch": 0.92, "grad_norm": 1.6687805847515185, "learning_rate": 1.7746189589610275e-07, "loss": 0.637, "step": 14334 }, { "epoch": 0.92, "grad_norm": 1.5198091080170648, "learning_rate": 1.7718829183304254e-07, "loss": 0.6109, "step": 14335 }, { "epoch": 0.92, "grad_norm": 1.5265012601496175, "learning_rate": 1.769148950442956e-07, "loss": 0.6797, "step": 14336 }, { "epoch": 0.92, "grad_norm": 1.3499442955311494, "learning_rate": 1.766417055416114e-07, "loss": 0.603, "step": 14337 }, { "epoch": 0.92, "grad_norm": 1.4490863266752683, "learning_rate": 1.7636872333673116e-07, "loss": 0.6141, "step": 14338 }, { "epoch": 0.92, "grad_norm": 1.475328575392821, "learning_rate": 1.7609594844138767e-07, "loss": 0.6426, "step": 14339 }, { "epoch": 0.92, "grad_norm": 1.7277098050749224, "learning_rate": 1.7582338086730377e-07, "loss": 0.7197, "step": 14340 }, { "epoch": 0.92, "grad_norm": 1.477473496902557, "learning_rate": 1.7555102062619454e-07, "loss": 0.6557, "step": 14341 }, { "epoch": 0.92, "grad_norm": 1.68419856238586, "learning_rate": 1.7527886772976456e-07, "loss": 0.6752, "step": 14342 }, { "epoch": 0.92, "grad_norm": 3.4501079836182025, "learning_rate": 1.7500692218971048e-07, "loss": 0.6064, "step": 14343 }, { "epoch": 0.92, "grad_norm": 1.5316674006751265, "learning_rate": 1.7473518401772026e-07, "loss": 0.6784, "step": 14344 }, { "epoch": 0.92, "grad_norm": 1.7123145052061277, "learning_rate": 1.7446365322547231e-07, "loss": 0.7369, "step": 14345 }, { "epoch": 0.92, "grad_norm": 2.7334287629481637, "learning_rate": 1.7419232982463785e-07, "loss": 0.5942, "step": 14346 }, { "epoch": 0.92, "grad_norm": 1.526784953329904, "learning_rate": 1.7392121382687533e-07, "loss": 0.6797, "step": 14347 }, { "epoch": 0.92, "grad_norm": 1.6448507219705382, "learning_rate": 1.736503052438382e-07, "loss": 0.6046, "step": 14348 }, { "epoch": 0.92, "grad_norm": 1.7479247679952101, "learning_rate": 1.733796040871699e-07, "loss": 0.6235, "step": 14349 }, { "epoch": 0.92, "grad_norm": 1.463734157070869, "learning_rate": 1.7310911036850398e-07, "loss": 0.6398, "step": 14350 }, { "epoch": 0.92, "grad_norm": 1.878832519875835, "learning_rate": 1.7283882409946552e-07, "loss": 0.5704, "step": 14351 }, { "epoch": 0.92, "grad_norm": 1.692769215876082, "learning_rate": 1.7256874529167134e-07, "loss": 0.6882, "step": 14352 }, { "epoch": 0.92, "grad_norm": 1.5582027431596956, "learning_rate": 1.7229887395672884e-07, "loss": 0.7042, "step": 14353 }, { "epoch": 0.92, "grad_norm": 1.5668465214192406, "learning_rate": 1.7202921010623596e-07, "loss": 0.7272, "step": 14354 }, { "epoch": 0.92, "grad_norm": 1.1459664290092506, "learning_rate": 1.7175975375178343e-07, "loss": 0.666, "step": 14355 }, { "epoch": 0.92, "grad_norm": 1.6780912372748888, "learning_rate": 1.714905049049498e-07, "loss": 0.7195, "step": 14356 }, { "epoch": 0.92, "grad_norm": 1.694720564422718, "learning_rate": 1.7122146357730908e-07, "loss": 0.7081, "step": 14357 }, { "epoch": 0.92, "grad_norm": 1.6493841542978394, "learning_rate": 1.7095262978042316e-07, "loss": 0.6741, "step": 14358 }, { "epoch": 0.92, "grad_norm": 1.7706691884965038, "learning_rate": 1.7068400352584613e-07, "loss": 0.7592, "step": 14359 }, { "epoch": 0.92, "grad_norm": 1.8441794911203009, "learning_rate": 1.7041558482512265e-07, "loss": 0.7116, "step": 14360 }, { "epoch": 0.92, "grad_norm": 1.7876249024306987, "learning_rate": 1.7014737368978795e-07, "loss": 0.6584, "step": 14361 }, { "epoch": 0.92, "grad_norm": 1.1890558512757825, "learning_rate": 1.6987937013137224e-07, "loss": 0.7009, "step": 14362 }, { "epoch": 0.92, "grad_norm": 1.095335007299647, "learning_rate": 1.6961157416139018e-07, "loss": 0.6612, "step": 14363 }, { "epoch": 0.92, "grad_norm": 1.4549648651800235, "learning_rate": 1.693439857913537e-07, "loss": 0.6096, "step": 14364 }, { "epoch": 0.92, "grad_norm": 1.4554872696170742, "learning_rate": 1.690766050327608e-07, "loss": 0.6775, "step": 14365 }, { "epoch": 0.92, "grad_norm": 1.5992183916376788, "learning_rate": 1.6880943189710508e-07, "loss": 0.6899, "step": 14366 }, { "epoch": 0.92, "grad_norm": 1.5661017038361957, "learning_rate": 1.685424663958679e-07, "loss": 0.738, "step": 14367 }, { "epoch": 0.92, "grad_norm": 1.4202090869375308, "learning_rate": 1.6827570854052345e-07, "loss": 0.5402, "step": 14368 }, { "epoch": 0.92, "grad_norm": 1.9783891560382445, "learning_rate": 1.680091583425364e-07, "loss": 0.6625, "step": 14369 }, { "epoch": 0.92, "grad_norm": 1.3355684032349708, "learning_rate": 1.677428158133615e-07, "loss": 0.6216, "step": 14370 }, { "epoch": 0.92, "grad_norm": 1.5166512032302342, "learning_rate": 1.674766809644479e-07, "loss": 0.6442, "step": 14371 }, { "epoch": 0.92, "grad_norm": 1.9058108859102636, "learning_rate": 1.672107538072304e-07, "loss": 0.6051, "step": 14372 }, { "epoch": 0.92, "grad_norm": 1.4316215994069517, "learning_rate": 1.6694503435314035e-07, "loss": 0.5393, "step": 14373 }, { "epoch": 0.92, "grad_norm": 1.365354007851603, "learning_rate": 1.6667952261359754e-07, "loss": 0.5608, "step": 14374 }, { "epoch": 0.92, "grad_norm": 1.4793206394826506, "learning_rate": 1.6641421860001172e-07, "loss": 0.7035, "step": 14375 }, { "epoch": 0.92, "grad_norm": 1.493367601126948, "learning_rate": 1.6614912232378656e-07, "loss": 0.6624, "step": 14376 }, { "epoch": 0.92, "grad_norm": 1.4864817865321382, "learning_rate": 1.6588423379631458e-07, "loss": 0.6086, "step": 14377 }, { "epoch": 0.92, "grad_norm": 1.508881242622744, "learning_rate": 1.6561955302898114e-07, "loss": 0.5183, "step": 14378 }, { "epoch": 0.92, "grad_norm": 1.1559755037924297, "learning_rate": 1.6535508003315937e-07, "loss": 0.6642, "step": 14379 }, { "epoch": 0.92, "grad_norm": 1.0238108310291067, "learning_rate": 1.650908148202185e-07, "loss": 0.6108, "step": 14380 }, { "epoch": 0.92, "grad_norm": 2.028980375940922, "learning_rate": 1.6482675740151444e-07, "loss": 0.6283, "step": 14381 }, { "epoch": 0.92, "grad_norm": 1.3328435595975359, "learning_rate": 1.6456290778839645e-07, "loss": 0.7257, "step": 14382 }, { "epoch": 0.92, "grad_norm": 1.2652190961588148, "learning_rate": 1.642992659922038e-07, "loss": 0.6615, "step": 14383 }, { "epoch": 0.92, "grad_norm": 1.5693363896624457, "learning_rate": 1.6403583202426689e-07, "loss": 0.6164, "step": 14384 }, { "epoch": 0.92, "grad_norm": 1.3966328813898814, "learning_rate": 1.6377260589590939e-07, "loss": 0.6631, "step": 14385 }, { "epoch": 0.92, "grad_norm": 1.4383418870922278, "learning_rate": 1.6350958761844226e-07, "loss": 0.6441, "step": 14386 }, { "epoch": 0.92, "grad_norm": 3.085958651328051, "learning_rate": 1.632467772031704e-07, "loss": 0.6697, "step": 14387 }, { "epoch": 0.92, "grad_norm": 1.601707405269508, "learning_rate": 1.629841746613886e-07, "loss": 0.5564, "step": 14388 }, { "epoch": 0.92, "grad_norm": 1.6784454189313123, "learning_rate": 1.6272178000438288e-07, "loss": 0.6894, "step": 14389 }, { "epoch": 0.92, "grad_norm": 1.4557432782527082, "learning_rate": 1.6245959324343086e-07, "loss": 0.6779, "step": 14390 }, { "epoch": 0.92, "grad_norm": 1.150323832866659, "learning_rate": 1.621976143898002e-07, "loss": 0.7193, "step": 14391 }, { "epoch": 0.92, "grad_norm": 1.5077050062582686, "learning_rate": 1.6193584345475078e-07, "loss": 0.6171, "step": 14392 }, { "epoch": 0.92, "grad_norm": 1.4939249999977644, "learning_rate": 1.6167428044953138e-07, "loss": 0.6245, "step": 14393 }, { "epoch": 0.92, "grad_norm": 1.634223191895605, "learning_rate": 1.6141292538538634e-07, "loss": 0.646, "step": 14394 }, { "epoch": 0.92, "grad_norm": 1.729767603193709, "learning_rate": 1.6115177827354556e-07, "loss": 0.72, "step": 14395 }, { "epoch": 0.92, "grad_norm": 1.6873600504898192, "learning_rate": 1.6089083912523396e-07, "loss": 0.6969, "step": 14396 }, { "epoch": 0.92, "grad_norm": 1.7368295539035523, "learning_rate": 1.6063010795166533e-07, "loss": 0.6646, "step": 14397 }, { "epoch": 0.92, "grad_norm": 1.4944723076348603, "learning_rate": 1.6036958476404574e-07, "loss": 0.635, "step": 14398 }, { "epoch": 0.92, "grad_norm": 1.66836990858711, "learning_rate": 1.6010926957357232e-07, "loss": 0.6548, "step": 14399 }, { "epoch": 0.92, "grad_norm": 1.3018123840453708, "learning_rate": 1.598491623914322e-07, "loss": 0.5322, "step": 14400 }, { "epoch": 0.92, "grad_norm": 1.5481442542891897, "learning_rate": 1.5958926322880487e-07, "loss": 0.6472, "step": 14401 }, { "epoch": 0.92, "grad_norm": 1.2243353187537882, "learning_rate": 1.5932957209685906e-07, "loss": 0.6142, "step": 14402 }, { "epoch": 0.92, "grad_norm": 1.6619325352496754, "learning_rate": 1.590700890067576e-07, "loss": 0.6976, "step": 14403 }, { "epoch": 0.92, "grad_norm": 1.4447916654146273, "learning_rate": 1.5881081396965093e-07, "loss": 0.5983, "step": 14404 }, { "epoch": 0.92, "grad_norm": 1.3212090983246851, "learning_rate": 1.5855174699668298e-07, "loss": 0.6796, "step": 14405 }, { "epoch": 0.92, "grad_norm": 1.5908235362366219, "learning_rate": 1.5829288809898757e-07, "loss": 0.6296, "step": 14406 }, { "epoch": 0.92, "grad_norm": 1.470770996807109, "learning_rate": 1.580342372876903e-07, "loss": 0.6469, "step": 14407 }, { "epoch": 0.92, "grad_norm": 1.5678943938513032, "learning_rate": 1.577757945739067e-07, "loss": 0.631, "step": 14408 }, { "epoch": 0.92, "grad_norm": 1.7750827619362395, "learning_rate": 1.5751755996874452e-07, "loss": 0.6972, "step": 14409 }, { "epoch": 0.92, "grad_norm": 1.6457743124865711, "learning_rate": 1.5725953348330214e-07, "loss": 0.6876, "step": 14410 }, { "epoch": 0.92, "grad_norm": 1.672969652258527, "learning_rate": 1.5700171512866956e-07, "loss": 0.6712, "step": 14411 }, { "epoch": 0.92, "grad_norm": 1.521273798033137, "learning_rate": 1.5674410491592573e-07, "loss": 0.704, "step": 14412 }, { "epoch": 0.92, "grad_norm": 1.4820363454967387, "learning_rate": 1.5648670285614397e-07, "loss": 0.6645, "step": 14413 }, { "epoch": 0.92, "grad_norm": 1.6780753492934846, "learning_rate": 1.5622950896038603e-07, "loss": 0.7112, "step": 14414 }, { "epoch": 0.92, "grad_norm": 1.7758646159924223, "learning_rate": 1.559725232397058e-07, "loss": 0.6986, "step": 14415 }, { "epoch": 0.92, "grad_norm": 1.4875640310774398, "learning_rate": 1.557157457051478e-07, "loss": 0.6925, "step": 14416 }, { "epoch": 0.92, "grad_norm": 1.5917517622422144, "learning_rate": 1.5545917636774655e-07, "loss": 0.6691, "step": 14417 }, { "epoch": 0.92, "grad_norm": 1.563238541131065, "learning_rate": 1.5520281523853208e-07, "loss": 0.5612, "step": 14418 }, { "epoch": 0.92, "grad_norm": 1.6096227014295454, "learning_rate": 1.5494666232851896e-07, "loss": 0.6971, "step": 14419 }, { "epoch": 0.92, "grad_norm": 3.105080399718023, "learning_rate": 1.5469071764871834e-07, "loss": 0.557, "step": 14420 }, { "epoch": 0.92, "grad_norm": 1.483510412927246, "learning_rate": 1.5443498121012813e-07, "loss": 0.6487, "step": 14421 }, { "epoch": 0.92, "grad_norm": 1.6744554738826405, "learning_rate": 1.5417945302374116e-07, "loss": 0.6335, "step": 14422 }, { "epoch": 0.92, "grad_norm": 1.6066256349851067, "learning_rate": 1.5392413310053866e-07, "loss": 0.5999, "step": 14423 }, { "epoch": 0.92, "grad_norm": 1.7976998195349188, "learning_rate": 1.536690214514941e-07, "loss": 0.6459, "step": 14424 }, { "epoch": 0.92, "grad_norm": 2.0791931907745944, "learning_rate": 1.5341411808757146e-07, "loss": 0.6446, "step": 14425 }, { "epoch": 0.92, "grad_norm": 1.3459326521710382, "learning_rate": 1.531594230197253e-07, "loss": 0.635, "step": 14426 }, { "epoch": 0.92, "grad_norm": 1.165863558829678, "learning_rate": 1.5290493625890413e-07, "loss": 0.6973, "step": 14427 }, { "epoch": 0.92, "grad_norm": 1.43531988733163, "learning_rate": 1.5265065781604193e-07, "loss": 0.6475, "step": 14428 }, { "epoch": 0.92, "grad_norm": 1.5943121624081689, "learning_rate": 1.5239658770206945e-07, "loss": 0.5806, "step": 14429 }, { "epoch": 0.92, "grad_norm": 1.632630805310865, "learning_rate": 1.521427259279057e-07, "loss": 0.5971, "step": 14430 }, { "epoch": 0.92, "grad_norm": 1.300479569754335, "learning_rate": 1.5188907250446028e-07, "loss": 0.6218, "step": 14431 }, { "epoch": 0.92, "grad_norm": 2.0365495008333827, "learning_rate": 1.5163562744263561e-07, "loss": 0.7227, "step": 14432 }, { "epoch": 0.92, "grad_norm": 1.5198089491712632, "learning_rate": 1.513823907533235e-07, "loss": 0.7113, "step": 14433 }, { "epoch": 0.92, "grad_norm": 1.6459061640474797, "learning_rate": 1.5112936244740862e-07, "loss": 0.7154, "step": 14434 }, { "epoch": 0.92, "grad_norm": 1.3569876766147437, "learning_rate": 1.508765425357639e-07, "loss": 0.6883, "step": 14435 }, { "epoch": 0.92, "grad_norm": 1.7168858442030521, "learning_rate": 1.5062393102925676e-07, "loss": 0.691, "step": 14436 }, { "epoch": 0.92, "grad_norm": 1.40772829032052, "learning_rate": 1.5037152793874244e-07, "loss": 0.6277, "step": 14437 }, { "epoch": 0.92, "grad_norm": 1.2010931038708124, "learning_rate": 1.5011933327507e-07, "loss": 0.6517, "step": 14438 }, { "epoch": 0.92, "grad_norm": 1.5953986347832085, "learning_rate": 1.4986734704907745e-07, "loss": 0.6759, "step": 14439 }, { "epoch": 0.92, "grad_norm": 1.5964798093075436, "learning_rate": 1.4961556927159392e-07, "loss": 0.7013, "step": 14440 }, { "epoch": 0.92, "grad_norm": 1.5001545679303023, "learning_rate": 1.4936399995344298e-07, "loss": 0.5724, "step": 14441 }, { "epoch": 0.92, "grad_norm": 1.5789919544662487, "learning_rate": 1.4911263910543316e-07, "loss": 0.6737, "step": 14442 }, { "epoch": 0.92, "grad_norm": 1.409495100061632, "learning_rate": 1.4886148673836975e-07, "loss": 0.6324, "step": 14443 }, { "epoch": 0.92, "grad_norm": 1.7024767112842243, "learning_rate": 1.4861054286304522e-07, "loss": 0.6194, "step": 14444 }, { "epoch": 0.92, "grad_norm": 1.369756906436285, "learning_rate": 1.4835980749024592e-07, "loss": 0.6312, "step": 14445 }, { "epoch": 0.92, "grad_norm": 1.6720383870141633, "learning_rate": 1.481092806307477e-07, "loss": 0.7986, "step": 14446 }, { "epoch": 0.92, "grad_norm": 1.886379862001914, "learning_rate": 1.4785896229531692e-07, "loss": 0.6054, "step": 14447 }, { "epoch": 0.92, "grad_norm": 1.5271954509596695, "learning_rate": 1.476088524947128e-07, "loss": 0.6846, "step": 14448 }, { "epoch": 0.92, "grad_norm": 1.5460850326569149, "learning_rate": 1.473589512396828e-07, "loss": 0.7227, "step": 14449 }, { "epoch": 0.92, "grad_norm": 1.5892543095677556, "learning_rate": 1.4710925854096946e-07, "loss": 0.6748, "step": 14450 }, { "epoch": 0.92, "grad_norm": 1.898015699659274, "learning_rate": 1.46859774409302e-07, "loss": 0.7148, "step": 14451 }, { "epoch": 0.93, "grad_norm": 1.6933239087555136, "learning_rate": 1.46610498855404e-07, "loss": 0.6762, "step": 14452 }, { "epoch": 0.93, "grad_norm": 1.8609067249588838, "learning_rate": 1.4636143188998808e-07, "loss": 0.6529, "step": 14453 }, { "epoch": 0.93, "grad_norm": 1.2106930122972577, "learning_rate": 1.461125735237595e-07, "loss": 0.6612, "step": 14454 }, { "epoch": 0.93, "grad_norm": 1.6373325286791538, "learning_rate": 1.4586392376741254e-07, "loss": 0.6702, "step": 14455 }, { "epoch": 0.93, "grad_norm": 1.681499661086597, "learning_rate": 1.4561548263163472e-07, "loss": 0.5493, "step": 14456 }, { "epoch": 0.93, "grad_norm": 1.5035529185005527, "learning_rate": 1.4536725012710252e-07, "loss": 0.5466, "step": 14457 }, { "epoch": 0.93, "grad_norm": 1.5096505492053733, "learning_rate": 1.451192262644846e-07, "loss": 0.6194, "step": 14458 }, { "epoch": 0.93, "grad_norm": 1.8880647290123012, "learning_rate": 1.4487141105444136e-07, "loss": 0.6107, "step": 14459 }, { "epoch": 0.93, "grad_norm": 1.5666012051983529, "learning_rate": 1.446238045076226e-07, "loss": 0.6979, "step": 14460 }, { "epoch": 0.93, "grad_norm": 1.8430016714294215, "learning_rate": 1.4437640663467034e-07, "loss": 0.6534, "step": 14461 }, { "epoch": 0.93, "grad_norm": 1.6434023282623649, "learning_rate": 1.4412921744621722e-07, "loss": 0.7227, "step": 14462 }, { "epoch": 0.93, "grad_norm": 1.2905459840157576, "learning_rate": 1.4388223695288695e-07, "loss": 0.5183, "step": 14463 }, { "epoch": 0.93, "grad_norm": 1.449992874151284, "learning_rate": 1.4363546516529326e-07, "loss": 0.5704, "step": 14464 }, { "epoch": 0.93, "grad_norm": 1.1666059102356419, "learning_rate": 1.433889020940432e-07, "loss": 0.6997, "step": 14465 }, { "epoch": 0.93, "grad_norm": 1.5930894545413012, "learning_rate": 1.431425477497328e-07, "loss": 0.6186, "step": 14466 }, { "epoch": 0.93, "grad_norm": 1.5298744799697939, "learning_rate": 1.4289640214294963e-07, "loss": 0.65, "step": 14467 }, { "epoch": 0.93, "grad_norm": 1.6003807994666615, "learning_rate": 1.4265046528427362e-07, "loss": 0.6173, "step": 14468 }, { "epoch": 0.93, "grad_norm": 2.0955576654439745, "learning_rate": 1.424047371842735e-07, "loss": 0.6358, "step": 14469 }, { "epoch": 0.93, "grad_norm": 1.8180180179037446, "learning_rate": 1.4215921785351083e-07, "loss": 0.557, "step": 14470 }, { "epoch": 0.93, "grad_norm": 1.561222989564129, "learning_rate": 1.4191390730253718e-07, "loss": 0.6903, "step": 14471 }, { "epoch": 0.93, "grad_norm": 1.7425442904086308, "learning_rate": 1.416688055418952e-07, "loss": 0.6337, "step": 14472 }, { "epoch": 0.93, "grad_norm": 1.1626381257450196, "learning_rate": 1.4142391258211985e-07, "loss": 0.7221, "step": 14473 }, { "epoch": 0.93, "grad_norm": 1.125952397609305, "learning_rate": 1.4117922843373487e-07, "loss": 0.6981, "step": 14474 }, { "epoch": 0.93, "grad_norm": 1.5752780015821932, "learning_rate": 1.409347531072569e-07, "loss": 0.6222, "step": 14475 }, { "epoch": 0.93, "grad_norm": 1.4203437645123913, "learning_rate": 1.4069048661319308e-07, "loss": 0.645, "step": 14476 }, { "epoch": 0.93, "grad_norm": 1.6991129425403955, "learning_rate": 1.4044642896204107e-07, "loss": 0.624, "step": 14477 }, { "epoch": 0.93, "grad_norm": 1.5074838857739348, "learning_rate": 1.402025801642909e-07, "loss": 0.653, "step": 14478 }, { "epoch": 0.93, "grad_norm": 1.5983956035313356, "learning_rate": 1.3995894023042135e-07, "loss": 0.6886, "step": 14479 }, { "epoch": 0.93, "grad_norm": 1.4815286501299756, "learning_rate": 1.397155091709046e-07, "loss": 0.7068, "step": 14480 }, { "epoch": 0.93, "grad_norm": 1.1930701800503107, "learning_rate": 1.394722869962023e-07, "loss": 0.607, "step": 14481 }, { "epoch": 0.93, "grad_norm": 1.1414441547828658, "learning_rate": 1.392292737167672e-07, "loss": 0.6827, "step": 14482 }, { "epoch": 0.93, "grad_norm": 1.561998732693169, "learning_rate": 1.3898646934304538e-07, "loss": 0.6989, "step": 14483 }, { "epoch": 0.93, "grad_norm": 1.4471225073257532, "learning_rate": 1.3874387388546906e-07, "loss": 0.6753, "step": 14484 }, { "epoch": 0.93, "grad_norm": 1.7310930475814805, "learning_rate": 1.3850148735446767e-07, "loss": 0.7466, "step": 14485 }, { "epoch": 0.93, "grad_norm": 1.625610635237539, "learning_rate": 1.3825930976045565e-07, "loss": 0.5861, "step": 14486 }, { "epoch": 0.93, "grad_norm": 1.4376464710526442, "learning_rate": 1.38017341113843e-07, "loss": 0.7186, "step": 14487 }, { "epoch": 0.93, "grad_norm": 1.4856714773757462, "learning_rate": 1.3777558142502868e-07, "loss": 0.6395, "step": 14488 }, { "epoch": 0.93, "grad_norm": 1.4695433111658345, "learning_rate": 1.3753403070440263e-07, "loss": 0.6441, "step": 14489 }, { "epoch": 0.93, "grad_norm": 1.509324149161056, "learning_rate": 1.3729268896234716e-07, "loss": 0.5811, "step": 14490 }, { "epoch": 0.93, "grad_norm": 1.6580470449686195, "learning_rate": 1.3705155620923337e-07, "loss": 0.6286, "step": 14491 }, { "epoch": 0.93, "grad_norm": 1.038576280403863, "learning_rate": 1.368106324554258e-07, "loss": 0.6477, "step": 14492 }, { "epoch": 0.93, "grad_norm": 1.0953844717049759, "learning_rate": 1.3656991771127781e-07, "loss": 0.6293, "step": 14493 }, { "epoch": 0.93, "grad_norm": 1.6192757056095006, "learning_rate": 1.3632941198713557e-07, "loss": 0.7545, "step": 14494 }, { "epoch": 0.93, "grad_norm": 1.5768209906759583, "learning_rate": 1.3608911529333467e-07, "loss": 0.5583, "step": 14495 }, { "epoch": 0.93, "grad_norm": 1.9101547268620755, "learning_rate": 1.3584902764020302e-07, "loss": 0.6833, "step": 14496 }, { "epoch": 0.93, "grad_norm": 1.6552657356313598, "learning_rate": 1.3560914903806065e-07, "loss": 0.7307, "step": 14497 }, { "epoch": 0.93, "grad_norm": 1.5844973612765962, "learning_rate": 1.353694794972138e-07, "loss": 0.6258, "step": 14498 }, { "epoch": 0.93, "grad_norm": 1.7658965581943642, "learning_rate": 1.3513001902796642e-07, "loss": 0.6544, "step": 14499 }, { "epoch": 0.93, "grad_norm": 1.652170087272544, "learning_rate": 1.3489076764060693e-07, "loss": 0.6721, "step": 14500 }, { "epoch": 0.93, "grad_norm": 1.5378492303858389, "learning_rate": 1.3465172534541936e-07, "loss": 0.6522, "step": 14501 }, { "epoch": 0.93, "grad_norm": 1.468490102012958, "learning_rate": 1.3441289215267772e-07, "loss": 0.6379, "step": 14502 }, { "epoch": 0.93, "grad_norm": 1.0859482099351299, "learning_rate": 1.34174268072646e-07, "loss": 0.6665, "step": 14503 }, { "epoch": 0.93, "grad_norm": 1.5478860301630086, "learning_rate": 1.3393585311557933e-07, "loss": 0.7113, "step": 14504 }, { "epoch": 0.93, "grad_norm": 1.40769743249266, "learning_rate": 1.3369764729172453e-07, "loss": 0.5642, "step": 14505 }, { "epoch": 0.93, "grad_norm": 1.733804910816852, "learning_rate": 1.3345965061132004e-07, "loss": 0.65, "step": 14506 }, { "epoch": 0.93, "grad_norm": 1.6443670781069841, "learning_rate": 1.3322186308459274e-07, "loss": 0.6601, "step": 14507 }, { "epoch": 0.93, "grad_norm": 1.6319634381992236, "learning_rate": 1.329842847217644e-07, "loss": 0.6741, "step": 14508 }, { "epoch": 0.93, "grad_norm": 1.768954305143704, "learning_rate": 1.3274691553304352e-07, "loss": 0.6549, "step": 14509 }, { "epoch": 0.93, "grad_norm": 1.5766235614040645, "learning_rate": 1.325097555286331e-07, "loss": 0.6091, "step": 14510 }, { "epoch": 0.93, "grad_norm": 1.316179184620022, "learning_rate": 1.322728047187255e-07, "loss": 0.5998, "step": 14511 }, { "epoch": 0.93, "grad_norm": 1.6262719724666923, "learning_rate": 1.3203606311350426e-07, "loss": 0.7094, "step": 14512 }, { "epoch": 0.93, "grad_norm": 1.5607217322563016, "learning_rate": 1.31799530723144e-07, "loss": 0.6566, "step": 14513 }, { "epoch": 0.93, "grad_norm": 1.5830428983211162, "learning_rate": 1.3156320755780993e-07, "loss": 0.6334, "step": 14514 }, { "epoch": 0.93, "grad_norm": 1.9221003008834843, "learning_rate": 1.3132709362766006e-07, "loss": 0.6623, "step": 14515 }, { "epoch": 0.93, "grad_norm": 1.8554113846957234, "learning_rate": 1.3109118894284012e-07, "loss": 0.6071, "step": 14516 }, { "epoch": 0.93, "grad_norm": 1.7955055656409662, "learning_rate": 1.308554935134909e-07, "loss": 0.63, "step": 14517 }, { "epoch": 0.93, "grad_norm": 1.461905877959692, "learning_rate": 1.3062000734974045e-07, "loss": 0.6615, "step": 14518 }, { "epoch": 0.93, "grad_norm": 1.4782432902602753, "learning_rate": 1.3038473046171063e-07, "loss": 0.6857, "step": 14519 }, { "epoch": 0.93, "grad_norm": 1.0049113359652608, "learning_rate": 1.3014966285951226e-07, "loss": 0.5712, "step": 14520 }, { "epoch": 0.93, "grad_norm": 1.560865402368005, "learning_rate": 1.299148045532489e-07, "loss": 0.7091, "step": 14521 }, { "epoch": 0.93, "grad_norm": 1.8451567674719727, "learning_rate": 1.2968015555301305e-07, "loss": 0.6113, "step": 14522 }, { "epoch": 0.93, "grad_norm": 1.6273887347333809, "learning_rate": 1.2944571586888998e-07, "loss": 0.6984, "step": 14523 }, { "epoch": 0.93, "grad_norm": 1.5053407301234736, "learning_rate": 1.2921148551095663e-07, "loss": 0.6535, "step": 14524 }, { "epoch": 0.93, "grad_norm": 1.47099232207317, "learning_rate": 1.2897746448927828e-07, "loss": 0.6231, "step": 14525 }, { "epoch": 0.93, "grad_norm": 1.0571935320778465, "learning_rate": 1.28743652813913e-07, "loss": 0.6475, "step": 14526 }, { "epoch": 0.93, "grad_norm": 1.0191395654534434, "learning_rate": 1.2851005049490939e-07, "loss": 0.6256, "step": 14527 }, { "epoch": 0.93, "grad_norm": 1.788407799598061, "learning_rate": 1.282766575423078e-07, "loss": 0.7084, "step": 14528 }, { "epoch": 0.93, "grad_norm": 1.5983412575986704, "learning_rate": 1.2804347396613848e-07, "loss": 0.6617, "step": 14529 }, { "epoch": 0.93, "grad_norm": 1.7766529577259167, "learning_rate": 1.278104997764229e-07, "loss": 0.7426, "step": 14530 }, { "epoch": 0.93, "grad_norm": 1.4305714999686654, "learning_rate": 1.2757773498317416e-07, "loss": 0.6168, "step": 14531 }, { "epoch": 0.93, "grad_norm": 1.5227781268836318, "learning_rate": 1.2734517959639647e-07, "loss": 0.6314, "step": 14532 }, { "epoch": 0.93, "grad_norm": 1.5635876367133852, "learning_rate": 1.2711283362608351e-07, "loss": 0.6784, "step": 14533 }, { "epoch": 0.93, "grad_norm": 1.4950953608235382, "learning_rate": 1.2688069708222228e-07, "loss": 0.6302, "step": 14534 }, { "epoch": 0.93, "grad_norm": 1.678507433546554, "learning_rate": 1.266487699747887e-07, "loss": 0.7337, "step": 14535 }, { "epoch": 0.93, "grad_norm": 1.043699856461697, "learning_rate": 1.2641705231375034e-07, "loss": 0.6553, "step": 14536 }, { "epoch": 0.93, "grad_norm": 1.4570789955986043, "learning_rate": 1.2618554410906648e-07, "loss": 0.6898, "step": 14537 }, { "epoch": 0.93, "grad_norm": 1.6614515209871046, "learning_rate": 1.2595424537068635e-07, "loss": 0.6253, "step": 14538 }, { "epoch": 0.93, "grad_norm": 1.981744543224429, "learning_rate": 1.2572315610855201e-07, "loss": 0.6685, "step": 14539 }, { "epoch": 0.93, "grad_norm": 1.8282682736424245, "learning_rate": 1.2549227633259275e-07, "loss": 0.6126, "step": 14540 }, { "epoch": 0.93, "grad_norm": 1.5106812338899915, "learning_rate": 1.252616060527334e-07, "loss": 0.6697, "step": 14541 }, { "epoch": 0.93, "grad_norm": 1.0684968581489969, "learning_rate": 1.250311452788866e-07, "loss": 0.5267, "step": 14542 }, { "epoch": 0.93, "grad_norm": 1.5701477663368397, "learning_rate": 1.248008940209583e-07, "loss": 0.6254, "step": 14543 }, { "epoch": 0.93, "grad_norm": 1.3367794585235893, "learning_rate": 1.245708522888428e-07, "loss": 0.6307, "step": 14544 }, { "epoch": 0.93, "grad_norm": 1.548642608603821, "learning_rate": 1.243410200924272e-07, "loss": 0.6852, "step": 14545 }, { "epoch": 0.93, "grad_norm": 1.7620713848950114, "learning_rate": 1.2411139744158972e-07, "loss": 0.7707, "step": 14546 }, { "epoch": 0.93, "grad_norm": 1.719396374916013, "learning_rate": 1.2388198434619803e-07, "loss": 0.6555, "step": 14547 }, { "epoch": 0.93, "grad_norm": 1.5477923608133262, "learning_rate": 1.2365278081611365e-07, "loss": 0.6309, "step": 14548 }, { "epoch": 0.93, "grad_norm": 1.4666382245303942, "learning_rate": 1.2342378686118538e-07, "loss": 0.6785, "step": 14549 }, { "epoch": 0.93, "grad_norm": 1.311964895232065, "learning_rate": 1.2319500249125594e-07, "loss": 0.5554, "step": 14550 }, { "epoch": 0.93, "grad_norm": 1.5212367303535077, "learning_rate": 1.2296642771615741e-07, "loss": 0.6278, "step": 14551 }, { "epoch": 0.93, "grad_norm": 1.3612251951494345, "learning_rate": 1.227380625457142e-07, "loss": 0.7012, "step": 14552 }, { "epoch": 0.93, "grad_norm": 1.9503032751413765, "learning_rate": 1.2250990698974009e-07, "loss": 0.6864, "step": 14553 }, { "epoch": 0.93, "grad_norm": 2.5064052539453128, "learning_rate": 1.2228196105804113e-07, "loss": 0.6455, "step": 14554 }, { "epoch": 0.93, "grad_norm": 1.6594176484578145, "learning_rate": 1.2205422476041452e-07, "loss": 0.6522, "step": 14555 }, { "epoch": 0.93, "grad_norm": 1.5968771869204565, "learning_rate": 1.2182669810664683e-07, "loss": 0.5965, "step": 14556 }, { "epoch": 0.93, "grad_norm": 1.0335167011119242, "learning_rate": 1.2159938110651803e-07, "loss": 0.6413, "step": 14557 }, { "epoch": 0.93, "grad_norm": 1.6845952954001386, "learning_rate": 1.2137227376979587e-07, "loss": 0.603, "step": 14558 }, { "epoch": 0.93, "grad_norm": 1.2953879880798538, "learning_rate": 1.2114537610624255e-07, "loss": 0.6088, "step": 14559 }, { "epoch": 0.93, "grad_norm": 1.3965073251651396, "learning_rate": 1.2091868812560859e-07, "loss": 0.6293, "step": 14560 }, { "epoch": 0.93, "grad_norm": 1.012403700633823, "learning_rate": 1.206922098376373e-07, "loss": 0.5669, "step": 14561 }, { "epoch": 0.93, "grad_norm": 1.1258582794278618, "learning_rate": 1.2046594125206257e-07, "loss": 0.6136, "step": 14562 }, { "epoch": 0.93, "grad_norm": 1.4961611033850377, "learning_rate": 1.2023988237860718e-07, "loss": 0.6381, "step": 14563 }, { "epoch": 0.93, "grad_norm": 1.6264032937143136, "learning_rate": 1.2001403322698947e-07, "loss": 0.6626, "step": 14564 }, { "epoch": 0.93, "grad_norm": 1.4488628256552176, "learning_rate": 1.1978839380691277e-07, "loss": 0.6634, "step": 14565 }, { "epoch": 0.93, "grad_norm": 1.5164486025455644, "learning_rate": 1.195629641280771e-07, "loss": 0.7211, "step": 14566 }, { "epoch": 0.93, "grad_norm": 1.4522630095280382, "learning_rate": 1.1933774420016974e-07, "loss": 0.5835, "step": 14567 }, { "epoch": 0.93, "grad_norm": 1.5945547498010417, "learning_rate": 1.1911273403287016e-07, "loss": 0.6507, "step": 14568 }, { "epoch": 0.93, "grad_norm": 1.8072450891117877, "learning_rate": 1.188879336358495e-07, "loss": 0.605, "step": 14569 }, { "epoch": 0.93, "grad_norm": 2.152853790225602, "learning_rate": 1.1866334301876837e-07, "loss": 0.6138, "step": 14570 }, { "epoch": 0.93, "grad_norm": 1.4362196237212008, "learning_rate": 1.184389621912807e-07, "loss": 0.6576, "step": 14571 }, { "epoch": 0.93, "grad_norm": 1.4630236794808686, "learning_rate": 1.182147911630277e-07, "loss": 0.5904, "step": 14572 }, { "epoch": 0.93, "grad_norm": 1.8891088744004392, "learning_rate": 1.1799082994364553e-07, "loss": 0.7414, "step": 14573 }, { "epoch": 0.93, "grad_norm": 1.7642768703915888, "learning_rate": 1.177670785427587e-07, "loss": 0.7382, "step": 14574 }, { "epoch": 0.93, "grad_norm": 1.5822832496570283, "learning_rate": 1.17543536969984e-07, "loss": 0.7144, "step": 14575 }, { "epoch": 0.93, "grad_norm": 1.561837226124696, "learning_rate": 1.1732020523492871e-07, "loss": 0.6933, "step": 14576 }, { "epoch": 0.93, "grad_norm": 1.527080794532067, "learning_rate": 1.1709708334719128e-07, "loss": 0.6957, "step": 14577 }, { "epoch": 0.93, "grad_norm": 1.6993352078684707, "learning_rate": 1.1687417131636014e-07, "loss": 0.6672, "step": 14578 }, { "epoch": 0.93, "grad_norm": 1.431961240265476, "learning_rate": 1.1665146915201652e-07, "loss": 0.61, "step": 14579 }, { "epoch": 0.93, "grad_norm": 2.379056428332027, "learning_rate": 1.1642897686373167e-07, "loss": 0.6309, "step": 14580 }, { "epoch": 0.93, "grad_norm": 1.4803915138488555, "learning_rate": 1.1620669446106735e-07, "loss": 0.762, "step": 14581 }, { "epoch": 0.93, "grad_norm": 1.625933066831384, "learning_rate": 1.1598462195357707e-07, "loss": 0.7058, "step": 14582 }, { "epoch": 0.93, "grad_norm": 1.4969919681508237, "learning_rate": 1.157627593508054e-07, "loss": 0.6827, "step": 14583 }, { "epoch": 0.93, "grad_norm": 1.459007829920002, "learning_rate": 1.1554110666228691e-07, "loss": 0.6487, "step": 14584 }, { "epoch": 0.93, "grad_norm": 1.2739828484629456, "learning_rate": 1.153196638975479e-07, "loss": 0.663, "step": 14585 }, { "epoch": 0.93, "grad_norm": 1.4387085216528697, "learning_rate": 1.1509843106610574e-07, "loss": 0.697, "step": 14586 }, { "epoch": 0.93, "grad_norm": 1.0549186152613266, "learning_rate": 1.148774081774684e-07, "loss": 0.6195, "step": 14587 }, { "epoch": 0.93, "grad_norm": 2.1427888271852105, "learning_rate": 1.1465659524113438e-07, "loss": 0.623, "step": 14588 }, { "epoch": 0.93, "grad_norm": 1.520656732719959, "learning_rate": 1.1443599226659497e-07, "loss": 0.647, "step": 14589 }, { "epoch": 0.93, "grad_norm": 1.2579502327633896, "learning_rate": 1.1421559926333092e-07, "loss": 0.625, "step": 14590 }, { "epoch": 0.93, "grad_norm": 1.463878554553075, "learning_rate": 1.1399541624081357e-07, "loss": 0.634, "step": 14591 }, { "epoch": 0.93, "grad_norm": 1.5061733589156057, "learning_rate": 1.1377544320850641e-07, "loss": 0.5985, "step": 14592 }, { "epoch": 0.93, "grad_norm": 2.950882572568967, "learning_rate": 1.1355568017586305e-07, "loss": 0.6487, "step": 14593 }, { "epoch": 0.93, "grad_norm": 1.8695186611656995, "learning_rate": 1.1333612715232923e-07, "loss": 0.6367, "step": 14594 }, { "epoch": 0.93, "grad_norm": 1.8507458385645328, "learning_rate": 1.1311678414734018e-07, "loss": 0.6502, "step": 14595 }, { "epoch": 0.93, "grad_norm": 1.5743762038126254, "learning_rate": 1.1289765117032226e-07, "loss": 0.6717, "step": 14596 }, { "epoch": 0.93, "grad_norm": 1.5986939245984835, "learning_rate": 1.1267872823069459e-07, "loss": 0.6837, "step": 14597 }, { "epoch": 0.93, "grad_norm": 1.835139773733876, "learning_rate": 1.1246001533786576e-07, "loss": 0.6159, "step": 14598 }, { "epoch": 0.93, "grad_norm": 1.5019303019724413, "learning_rate": 1.1224151250123549e-07, "loss": 0.7103, "step": 14599 }, { "epoch": 0.93, "grad_norm": 1.5063349103005332, "learning_rate": 1.1202321973019403e-07, "loss": 0.6651, "step": 14600 }, { "epoch": 0.93, "grad_norm": 1.546270767980585, "learning_rate": 1.1180513703412388e-07, "loss": 0.5869, "step": 14601 }, { "epoch": 0.93, "grad_norm": 1.7053464478552922, "learning_rate": 1.1158726442239698e-07, "loss": 0.6174, "step": 14602 }, { "epoch": 0.93, "grad_norm": 2.8288169086742587, "learning_rate": 1.1136960190437751e-07, "loss": 0.6976, "step": 14603 }, { "epoch": 0.93, "grad_norm": 1.6650620146038984, "learning_rate": 1.1115214948942077e-07, "loss": 0.6836, "step": 14604 }, { "epoch": 0.93, "grad_norm": 1.6278945779918852, "learning_rate": 1.1093490718687094e-07, "loss": 0.6925, "step": 14605 }, { "epoch": 0.93, "grad_norm": 1.0423426119616035, "learning_rate": 1.1071787500606557e-07, "loss": 0.5975, "step": 14606 }, { "epoch": 0.93, "grad_norm": 1.5047211699270195, "learning_rate": 1.1050105295633274e-07, "loss": 0.5994, "step": 14607 }, { "epoch": 0.94, "grad_norm": 1.4896520818471795, "learning_rate": 1.1028444104698998e-07, "loss": 0.5301, "step": 14608 }, { "epoch": 0.94, "grad_norm": 1.7981884758331446, "learning_rate": 1.1006803928734711e-07, "loss": 0.7122, "step": 14609 }, { "epoch": 0.94, "grad_norm": 1.8812941957095646, "learning_rate": 1.0985184768670443e-07, "loss": 0.7043, "step": 14610 }, { "epoch": 0.94, "grad_norm": 1.5499787369686986, "learning_rate": 1.0963586625435507e-07, "loss": 0.6217, "step": 14611 }, { "epoch": 0.94, "grad_norm": 1.8572115317490805, "learning_rate": 1.0942009499957884e-07, "loss": 0.6182, "step": 14612 }, { "epoch": 0.94, "grad_norm": 1.5053186417489413, "learning_rate": 1.0920453393165109e-07, "loss": 0.6582, "step": 14613 }, { "epoch": 0.94, "grad_norm": 2.503101883790947, "learning_rate": 1.0898918305983496e-07, "loss": 0.6383, "step": 14614 }, { "epoch": 0.94, "grad_norm": 1.0750056189038908, "learning_rate": 1.087740423933864e-07, "loss": 0.6042, "step": 14615 }, { "epoch": 0.94, "grad_norm": 1.5501616734639045, "learning_rate": 1.0855911194155189e-07, "loss": 0.6767, "step": 14616 }, { "epoch": 0.94, "grad_norm": 1.4460235966664567, "learning_rate": 1.0834439171356848e-07, "loss": 0.6876, "step": 14617 }, { "epoch": 0.94, "grad_norm": 1.70193800172204, "learning_rate": 1.0812988171866434e-07, "loss": 0.644, "step": 14618 }, { "epoch": 0.94, "grad_norm": 1.6504911289610569, "learning_rate": 1.0791558196605823e-07, "loss": 0.5964, "step": 14619 }, { "epoch": 0.94, "grad_norm": 1.885394571468726, "learning_rate": 1.0770149246496109e-07, "loss": 0.6058, "step": 14620 }, { "epoch": 0.94, "grad_norm": 1.671670653153294, "learning_rate": 1.0748761322457334e-07, "loss": 0.6514, "step": 14621 }, { "epoch": 0.94, "grad_norm": 1.4539303068155964, "learning_rate": 1.0727394425408766e-07, "loss": 0.6341, "step": 14622 }, { "epoch": 0.94, "grad_norm": 2.107460610686153, "learning_rate": 1.0706048556268667e-07, "loss": 0.6608, "step": 14623 }, { "epoch": 0.94, "grad_norm": 1.432110857099566, "learning_rate": 1.0684723715954471e-07, "loss": 0.6087, "step": 14624 }, { "epoch": 0.94, "grad_norm": 1.481250190594041, "learning_rate": 1.0663419905382666e-07, "loss": 0.6951, "step": 14625 }, { "epoch": 0.94, "grad_norm": 1.6118719857851274, "learning_rate": 1.0642137125468743e-07, "loss": 0.6241, "step": 14626 }, { "epoch": 0.94, "grad_norm": 1.4477941335268958, "learning_rate": 1.0620875377127637e-07, "loss": 0.7097, "step": 14627 }, { "epoch": 0.94, "grad_norm": 1.6687126524223543, "learning_rate": 1.0599634661272839e-07, "loss": 0.6241, "step": 14628 }, { "epoch": 0.94, "grad_norm": 1.7275157913664938, "learning_rate": 1.0578414978817508e-07, "loss": 0.6919, "step": 14629 }, { "epoch": 0.94, "grad_norm": 1.9128516810327296, "learning_rate": 1.0557216330673359e-07, "loss": 0.7358, "step": 14630 }, { "epoch": 0.94, "grad_norm": 1.7441738744838153, "learning_rate": 1.0536038717751607e-07, "loss": 0.6468, "step": 14631 }, { "epoch": 0.94, "grad_norm": 1.4385199903762729, "learning_rate": 1.0514882140962468e-07, "loss": 0.728, "step": 14632 }, { "epoch": 0.94, "grad_norm": 1.639801433767382, "learning_rate": 1.0493746601215105e-07, "loss": 0.7237, "step": 14633 }, { "epoch": 0.94, "grad_norm": 1.73259954794315, "learning_rate": 1.0472632099417957e-07, "loss": 0.6185, "step": 14634 }, { "epoch": 0.94, "grad_norm": 1.7003456254806206, "learning_rate": 1.0451538636478353e-07, "loss": 0.6245, "step": 14635 }, { "epoch": 0.94, "grad_norm": 1.4996189235319113, "learning_rate": 1.043046621330307e-07, "loss": 0.6203, "step": 14636 }, { "epoch": 0.94, "grad_norm": 0.9647226683060036, "learning_rate": 1.0409414830797493e-07, "loss": 0.6003, "step": 14637 }, { "epoch": 0.94, "grad_norm": 1.7530692718381744, "learning_rate": 1.0388384489866565e-07, "loss": 0.6161, "step": 14638 }, { "epoch": 0.94, "grad_norm": 1.5616309557208672, "learning_rate": 1.0367375191414064e-07, "loss": 0.6896, "step": 14639 }, { "epoch": 0.94, "grad_norm": 1.3832046131911584, "learning_rate": 1.0346386936342878e-07, "loss": 0.6865, "step": 14640 }, { "epoch": 0.94, "grad_norm": 1.4288372048506153, "learning_rate": 1.032541972555512e-07, "loss": 0.5561, "step": 14641 }, { "epoch": 0.94, "grad_norm": 1.4800110111924552, "learning_rate": 1.0304473559951844e-07, "loss": 0.665, "step": 14642 }, { "epoch": 0.94, "grad_norm": 1.5951002127209384, "learning_rate": 1.0283548440433332e-07, "loss": 0.6431, "step": 14643 }, { "epoch": 0.94, "grad_norm": 1.558519001474136, "learning_rate": 1.0262644367898811e-07, "loss": 0.5485, "step": 14644 }, { "epoch": 0.94, "grad_norm": 1.4712748869622247, "learning_rate": 1.0241761343246781e-07, "loss": 0.6363, "step": 14645 }, { "epoch": 0.94, "grad_norm": 2.7910517746245893, "learning_rate": 1.0220899367374748e-07, "loss": 0.7152, "step": 14646 }, { "epoch": 0.94, "grad_norm": 1.5544519700191222, "learning_rate": 1.0200058441179272e-07, "loss": 0.6862, "step": 14647 }, { "epoch": 0.94, "grad_norm": 1.4727473969602753, "learning_rate": 1.0179238565556081e-07, "loss": 0.6016, "step": 14648 }, { "epoch": 0.94, "grad_norm": 1.4275138965219643, "learning_rate": 1.0158439741399961e-07, "loss": 0.518, "step": 14649 }, { "epoch": 0.94, "grad_norm": 1.7156882396397672, "learning_rate": 1.0137661969604806e-07, "loss": 0.748, "step": 14650 }, { "epoch": 0.94, "grad_norm": 1.713827902588721, "learning_rate": 1.0116905251063625e-07, "loss": 0.6399, "step": 14651 }, { "epoch": 0.94, "grad_norm": 1.7734712513690372, "learning_rate": 1.009616958666837e-07, "loss": 0.6407, "step": 14652 }, { "epoch": 0.94, "grad_norm": 1.5739767486260878, "learning_rate": 1.0075454977310384e-07, "loss": 0.6613, "step": 14653 }, { "epoch": 0.94, "grad_norm": 1.6197178326897081, "learning_rate": 1.00547614238799e-07, "loss": 0.7103, "step": 14654 }, { "epoch": 0.94, "grad_norm": 1.8678108740607995, "learning_rate": 1.0034088927266206e-07, "loss": 0.6472, "step": 14655 }, { "epoch": 0.94, "grad_norm": 1.5610000104250263, "learning_rate": 1.0013437488357814e-07, "loss": 0.709, "step": 14656 }, { "epoch": 0.94, "grad_norm": 1.144836482575951, "learning_rate": 9.99280710804229e-08, "loss": 0.7334, "step": 14657 }, { "epoch": 0.94, "grad_norm": 1.6306640836005972, "learning_rate": 9.972197787206317e-08, "loss": 0.7771, "step": 14658 }, { "epoch": 0.94, "grad_norm": 1.7928644847471733, "learning_rate": 9.951609526735517e-08, "loss": 0.6969, "step": 14659 }, { "epoch": 0.94, "grad_norm": 1.6981324844776904, "learning_rate": 9.931042327514851e-08, "loss": 0.5775, "step": 14660 }, { "epoch": 0.94, "grad_norm": 1.284877569848252, "learning_rate": 9.910496190428164e-08, "loss": 0.5566, "step": 14661 }, { "epoch": 0.94, "grad_norm": 1.7425030744160581, "learning_rate": 9.889971116358532e-08, "loss": 0.7302, "step": 14662 }, { "epoch": 0.94, "grad_norm": 1.4762090363770937, "learning_rate": 9.869467106188135e-08, "loss": 0.6124, "step": 14663 }, { "epoch": 0.94, "grad_norm": 1.5003095271496902, "learning_rate": 9.848984160798103e-08, "loss": 0.638, "step": 14664 }, { "epoch": 0.94, "grad_norm": 2.0230232640664325, "learning_rate": 9.828522281068787e-08, "loss": 0.6442, "step": 14665 }, { "epoch": 0.94, "grad_norm": 1.7866642652166898, "learning_rate": 9.808081467879593e-08, "loss": 0.8077, "step": 14666 }, { "epoch": 0.94, "grad_norm": 1.4710296510787848, "learning_rate": 9.787661722108988e-08, "loss": 0.6333, "step": 14667 }, { "epoch": 0.94, "grad_norm": 1.4388056213124003, "learning_rate": 9.767263044634601e-08, "loss": 0.6097, "step": 14668 }, { "epoch": 0.94, "grad_norm": 1.6662829942910933, "learning_rate": 9.74688543633323e-08, "loss": 0.655, "step": 14669 }, { "epoch": 0.94, "grad_norm": 1.6074257790210746, "learning_rate": 9.726528898080456e-08, "loss": 0.6587, "step": 14670 }, { "epoch": 0.94, "grad_norm": 1.466782170726274, "learning_rate": 9.706193430751298e-08, "loss": 0.6759, "step": 14671 }, { "epoch": 0.94, "grad_norm": 1.5786095906181916, "learning_rate": 9.68587903521967e-08, "loss": 0.6204, "step": 14672 }, { "epoch": 0.94, "grad_norm": 1.5562386894516862, "learning_rate": 9.665585712358704e-08, "loss": 0.6181, "step": 14673 }, { "epoch": 0.94, "grad_norm": 1.962344520232655, "learning_rate": 9.64531346304054e-08, "loss": 0.6483, "step": 14674 }, { "epoch": 0.94, "grad_norm": 1.5967176710567255, "learning_rate": 9.625062288136367e-08, "loss": 0.6187, "step": 14675 }, { "epoch": 0.94, "grad_norm": 1.155648199874988, "learning_rate": 9.604832188516711e-08, "loss": 0.6235, "step": 14676 }, { "epoch": 0.94, "grad_norm": 1.630576184708068, "learning_rate": 9.584623165050766e-08, "loss": 0.7008, "step": 14677 }, { "epoch": 0.94, "grad_norm": 1.6114728827752873, "learning_rate": 9.564435218607338e-08, "loss": 0.6033, "step": 14678 }, { "epoch": 0.94, "grad_norm": 1.5382758938637777, "learning_rate": 9.544268350053843e-08, "loss": 0.6876, "step": 14679 }, { "epoch": 0.94, "grad_norm": 1.585385354217567, "learning_rate": 9.524122560257142e-08, "loss": 0.6659, "step": 14680 }, { "epoch": 0.94, "grad_norm": 1.3210652179309932, "learning_rate": 9.50399785008299e-08, "loss": 0.7096, "step": 14681 }, { "epoch": 0.94, "grad_norm": 1.924693262103022, "learning_rate": 9.483894220396361e-08, "loss": 0.7966, "step": 14682 }, { "epoch": 0.94, "grad_norm": 1.0668775095780547, "learning_rate": 9.463811672061284e-08, "loss": 0.5616, "step": 14683 }, { "epoch": 0.94, "grad_norm": 1.4647036526094654, "learning_rate": 9.443750205940738e-08, "loss": 0.6363, "step": 14684 }, { "epoch": 0.94, "grad_norm": 1.8869739255986173, "learning_rate": 9.423709822897087e-08, "loss": 0.6102, "step": 14685 }, { "epoch": 0.94, "grad_norm": 1.6686943301155248, "learning_rate": 9.403690523791476e-08, "loss": 0.7498, "step": 14686 }, { "epoch": 0.94, "grad_norm": 1.6402940088827016, "learning_rate": 9.383692309484382e-08, "loss": 0.7228, "step": 14687 }, { "epoch": 0.94, "grad_norm": 1.0662799195357218, "learning_rate": 9.363715180835287e-08, "loss": 0.6813, "step": 14688 }, { "epoch": 0.94, "grad_norm": 1.6584561846365955, "learning_rate": 9.343759138702724e-08, "loss": 0.5641, "step": 14689 }, { "epoch": 0.94, "grad_norm": 1.4627577290945284, "learning_rate": 9.323824183944452e-08, "loss": 0.6063, "step": 14690 }, { "epoch": 0.94, "grad_norm": 1.5725039164190928, "learning_rate": 9.303910317417064e-08, "loss": 0.6469, "step": 14691 }, { "epoch": 0.94, "grad_norm": 1.3782837652673878, "learning_rate": 9.284017539976598e-08, "loss": 0.6638, "step": 14692 }, { "epoch": 0.94, "grad_norm": 1.4380850060292205, "learning_rate": 9.264145852477868e-08, "loss": 0.7043, "step": 14693 }, { "epoch": 0.94, "grad_norm": 1.1028682972548203, "learning_rate": 9.244295255774972e-08, "loss": 0.6775, "step": 14694 }, { "epoch": 0.94, "grad_norm": 1.5251658782606121, "learning_rate": 9.224465750721057e-08, "loss": 0.6686, "step": 14695 }, { "epoch": 0.94, "grad_norm": 1.7463417379976443, "learning_rate": 9.204657338168388e-08, "loss": 0.6509, "step": 14696 }, { "epoch": 0.94, "grad_norm": 1.043091171796729, "learning_rate": 9.184870018968173e-08, "loss": 0.6282, "step": 14697 }, { "epoch": 0.94, "grad_norm": 1.7056653120433223, "learning_rate": 9.165103793970897e-08, "loss": 0.7181, "step": 14698 }, { "epoch": 0.94, "grad_norm": 1.6266094157825164, "learning_rate": 9.14535866402616e-08, "loss": 0.5571, "step": 14699 }, { "epoch": 0.94, "grad_norm": 1.7156683133232622, "learning_rate": 9.125634629982394e-08, "loss": 0.6468, "step": 14700 }, { "epoch": 0.94, "grad_norm": 1.3773551993717421, "learning_rate": 9.10593169268742e-08, "loss": 0.7723, "step": 14701 }, { "epoch": 0.94, "grad_norm": 1.6252596648431685, "learning_rate": 9.086249852987949e-08, "loss": 0.7059, "step": 14702 }, { "epoch": 0.94, "grad_norm": 1.7517016786080277, "learning_rate": 9.066589111729973e-08, "loss": 0.574, "step": 14703 }, { "epoch": 0.94, "grad_norm": 1.7347378607687507, "learning_rate": 9.04694946975837e-08, "loss": 0.7097, "step": 14704 }, { "epoch": 0.94, "grad_norm": 1.638179748932761, "learning_rate": 9.027330927917244e-08, "loss": 0.6527, "step": 14705 }, { "epoch": 0.94, "grad_norm": 1.4234495686434177, "learning_rate": 9.007733487049808e-08, "loss": 0.6781, "step": 14706 }, { "epoch": 0.94, "grad_norm": 1.6445936174596487, "learning_rate": 8.988157147998222e-08, "loss": 0.6018, "step": 14707 }, { "epoch": 0.94, "grad_norm": 1.4424065895177367, "learning_rate": 8.968601911603869e-08, "loss": 0.5978, "step": 14708 }, { "epoch": 0.94, "grad_norm": 1.6688905970538337, "learning_rate": 8.949067778707188e-08, "loss": 0.688, "step": 14709 }, { "epoch": 0.94, "grad_norm": 1.762289967625783, "learning_rate": 8.929554750147784e-08, "loss": 0.6649, "step": 14710 }, { "epoch": 0.94, "grad_norm": 1.6847073928241774, "learning_rate": 8.91006282676421e-08, "loss": 0.6214, "step": 14711 }, { "epoch": 0.94, "grad_norm": 1.6777103414960748, "learning_rate": 8.890592009394239e-08, "loss": 0.5896, "step": 14712 }, { "epoch": 0.94, "grad_norm": 1.5812654459564732, "learning_rate": 8.871142298874647e-08, "loss": 0.6873, "step": 14713 }, { "epoch": 0.94, "grad_norm": 1.4567291659626171, "learning_rate": 8.851713696041375e-08, "loss": 0.6642, "step": 14714 }, { "epoch": 0.94, "grad_norm": 1.5718139999219571, "learning_rate": 8.832306201729368e-08, "loss": 0.5878, "step": 14715 }, { "epoch": 0.94, "grad_norm": 2.069185489014814, "learning_rate": 8.812919816772791e-08, "loss": 0.6003, "step": 14716 }, { "epoch": 0.94, "grad_norm": 1.6562208560901426, "learning_rate": 8.793554542004756e-08, "loss": 0.6051, "step": 14717 }, { "epoch": 0.94, "grad_norm": 1.6256692685479315, "learning_rate": 8.774210378257597e-08, "loss": 0.5939, "step": 14718 }, { "epoch": 0.94, "grad_norm": 1.6008492617390675, "learning_rate": 8.754887326362649e-08, "loss": 0.5967, "step": 14719 }, { "epoch": 0.94, "grad_norm": 1.5264876076587854, "learning_rate": 8.735585387150414e-08, "loss": 0.7198, "step": 14720 }, { "epoch": 0.94, "grad_norm": 1.3249619002536053, "learning_rate": 8.71630456145045e-08, "loss": 0.5919, "step": 14721 }, { "epoch": 0.94, "grad_norm": 1.7321390441900082, "learning_rate": 8.697044850091374e-08, "loss": 0.7194, "step": 14722 }, { "epoch": 0.94, "grad_norm": 1.4539577848846124, "learning_rate": 8.677806253900967e-08, "loss": 0.7071, "step": 14723 }, { "epoch": 0.94, "grad_norm": 1.2390063987826994, "learning_rate": 8.658588773705956e-08, "loss": 0.6134, "step": 14724 }, { "epoch": 0.94, "grad_norm": 1.2099601915710794, "learning_rate": 8.639392410332403e-08, "loss": 0.6924, "step": 14725 }, { "epoch": 0.94, "grad_norm": 1.681732858632366, "learning_rate": 8.62021716460526e-08, "loss": 0.6659, "step": 14726 }, { "epoch": 0.94, "grad_norm": 1.5365076241790825, "learning_rate": 8.601063037348644e-08, "loss": 0.6724, "step": 14727 }, { "epoch": 0.94, "grad_norm": 1.8846647625065394, "learning_rate": 8.581930029385788e-08, "loss": 0.7422, "step": 14728 }, { "epoch": 0.94, "grad_norm": 0.9400985646176834, "learning_rate": 8.562818141538976e-08, "loss": 0.5822, "step": 14729 }, { "epoch": 0.94, "grad_norm": 1.5728112231327114, "learning_rate": 8.543727374629607e-08, "loss": 0.6513, "step": 14730 }, { "epoch": 0.94, "grad_norm": 1.5611677892212241, "learning_rate": 8.52465772947808e-08, "loss": 0.6126, "step": 14731 }, { "epoch": 0.94, "grad_norm": 1.274961655946183, "learning_rate": 8.505609206904075e-08, "loss": 0.5495, "step": 14732 }, { "epoch": 0.94, "grad_norm": 1.0360141054750016, "learning_rate": 8.486581807726157e-08, "loss": 0.6229, "step": 14733 }, { "epoch": 0.94, "grad_norm": 1.5109687742332119, "learning_rate": 8.46757553276223e-08, "loss": 0.6652, "step": 14734 }, { "epoch": 0.94, "grad_norm": 1.8867100389424978, "learning_rate": 8.448590382829025e-08, "loss": 0.6747, "step": 14735 }, { "epoch": 0.94, "grad_norm": 3.035399580136911, "learning_rate": 8.429626358742504e-08, "loss": 0.6501, "step": 14736 }, { "epoch": 0.94, "grad_norm": 1.5375418321058627, "learning_rate": 8.410683461317682e-08, "loss": 0.6181, "step": 14737 }, { "epoch": 0.94, "grad_norm": 1.557049980617085, "learning_rate": 8.391761691368738e-08, "loss": 0.6715, "step": 14738 }, { "epoch": 0.94, "grad_norm": 2.091333293049928, "learning_rate": 8.372861049708859e-08, "loss": 0.7042, "step": 14739 }, { "epoch": 0.94, "grad_norm": 1.1220087274840433, "learning_rate": 8.353981537150335e-08, "loss": 0.6666, "step": 14740 }, { "epoch": 0.94, "grad_norm": 1.7917883910635544, "learning_rate": 8.335123154504688e-08, "loss": 0.6605, "step": 14741 }, { "epoch": 0.94, "grad_norm": 1.0758032471460088, "learning_rate": 8.316285902582211e-08, "loss": 0.5632, "step": 14742 }, { "epoch": 0.94, "grad_norm": 1.5515291231016037, "learning_rate": 8.297469782192702e-08, "loss": 0.6231, "step": 14743 }, { "epoch": 0.94, "grad_norm": 1.6350826854146439, "learning_rate": 8.278674794144625e-08, "loss": 0.7214, "step": 14744 }, { "epoch": 0.94, "grad_norm": 1.6603383738817106, "learning_rate": 8.25990093924589e-08, "loss": 0.658, "step": 14745 }, { "epoch": 0.94, "grad_norm": 1.8904063003818392, "learning_rate": 8.241148218303352e-08, "loss": 0.717, "step": 14746 }, { "epoch": 0.94, "grad_norm": 1.6543212035139878, "learning_rate": 8.222416632122864e-08, "loss": 0.6046, "step": 14747 }, { "epoch": 0.94, "grad_norm": 1.4058559844356768, "learning_rate": 8.203706181509674e-08, "loss": 0.5496, "step": 14748 }, { "epoch": 0.94, "grad_norm": 1.2293044255873076, "learning_rate": 8.185016867267693e-08, "loss": 0.6245, "step": 14749 }, { "epoch": 0.94, "grad_norm": 1.6292006381675521, "learning_rate": 8.166348690200276e-08, "loss": 0.7646, "step": 14750 }, { "epoch": 0.94, "grad_norm": 1.3245830170169874, "learning_rate": 8.147701651109674e-08, "loss": 0.7093, "step": 14751 }, { "epoch": 0.94, "grad_norm": 1.068451716296929, "learning_rate": 8.129075750797355e-08, "loss": 0.6029, "step": 14752 }, { "epoch": 0.94, "grad_norm": 1.5401232750557632, "learning_rate": 8.11047099006379e-08, "loss": 0.6628, "step": 14753 }, { "epoch": 0.94, "grad_norm": 1.0838453285692438, "learning_rate": 8.091887369708506e-08, "loss": 0.6304, "step": 14754 }, { "epoch": 0.94, "grad_norm": 1.608480892508038, "learning_rate": 8.073324890530421e-08, "loss": 0.6544, "step": 14755 }, { "epoch": 0.94, "grad_norm": 1.416227650988716, "learning_rate": 8.054783553327006e-08, "loss": 0.5836, "step": 14756 }, { "epoch": 0.94, "grad_norm": 1.787111217858542, "learning_rate": 8.036263358895402e-08, "loss": 0.6302, "step": 14757 }, { "epoch": 0.94, "grad_norm": 1.6237107266857174, "learning_rate": 8.017764308031306e-08, "loss": 0.6864, "step": 14758 }, { "epoch": 0.94, "grad_norm": 1.7536695883532132, "learning_rate": 7.999286401529971e-08, "loss": 0.7901, "step": 14759 }, { "epoch": 0.94, "grad_norm": 1.856813381247626, "learning_rate": 7.980829640185483e-08, "loss": 0.7267, "step": 14760 }, { "epoch": 0.94, "grad_norm": 1.556575543631828, "learning_rate": 7.962394024791043e-08, "loss": 0.7163, "step": 14761 }, { "epoch": 0.94, "grad_norm": 1.688806476955761, "learning_rate": 7.943979556139014e-08, "loss": 0.6786, "step": 14762 }, { "epoch": 0.94, "grad_norm": 1.478945344184406, "learning_rate": 7.925586235020766e-08, "loss": 0.6828, "step": 14763 }, { "epoch": 0.94, "grad_norm": 1.7351078200105297, "learning_rate": 7.907214062226886e-08, "loss": 0.7078, "step": 14764 }, { "epoch": 0.95, "grad_norm": 1.7241813860116473, "learning_rate": 7.888863038546801e-08, "loss": 0.7083, "step": 14765 }, { "epoch": 0.95, "grad_norm": 1.7160657228851215, "learning_rate": 7.870533164769379e-08, "loss": 0.656, "step": 14766 }, { "epoch": 0.95, "grad_norm": 1.7412329026137112, "learning_rate": 7.852224441682377e-08, "loss": 0.6813, "step": 14767 }, { "epoch": 0.95, "grad_norm": 1.5193596832769902, "learning_rate": 7.833936870072612e-08, "loss": 0.6602, "step": 14768 }, { "epoch": 0.95, "grad_norm": 1.6263836021710485, "learning_rate": 7.81567045072601e-08, "loss": 0.663, "step": 14769 }, { "epoch": 0.95, "grad_norm": 1.405112713625622, "learning_rate": 7.797425184427721e-08, "loss": 0.6654, "step": 14770 }, { "epoch": 0.95, "grad_norm": 1.2014588240743178, "learning_rate": 7.779201071961784e-08, "loss": 0.6494, "step": 14771 }, { "epoch": 0.95, "grad_norm": 1.098454622840851, "learning_rate": 7.760998114111462e-08, "loss": 0.5915, "step": 14772 }, { "epoch": 0.95, "grad_norm": 1.5351821659973846, "learning_rate": 7.742816311659185e-08, "loss": 0.655, "step": 14773 }, { "epoch": 0.95, "grad_norm": 1.5652479750572228, "learning_rate": 7.724655665386271e-08, "loss": 0.7491, "step": 14774 }, { "epoch": 0.95, "grad_norm": 1.5349400293450186, "learning_rate": 7.706516176073209e-08, "loss": 0.6817, "step": 14775 }, { "epoch": 0.95, "grad_norm": 1.5824266695209983, "learning_rate": 7.688397844499652e-08, "loss": 0.6714, "step": 14776 }, { "epoch": 0.95, "grad_norm": 1.1894306705117537, "learning_rate": 7.670300671444309e-08, "loss": 0.6877, "step": 14777 }, { "epoch": 0.95, "grad_norm": 1.674131512096588, "learning_rate": 7.652224657684837e-08, "loss": 0.6496, "step": 14778 }, { "epoch": 0.95, "grad_norm": 1.6326095417933615, "learning_rate": 7.634169803998226e-08, "loss": 0.7489, "step": 14779 }, { "epoch": 0.95, "grad_norm": 1.390143624531933, "learning_rate": 7.616136111160411e-08, "loss": 0.6566, "step": 14780 }, { "epoch": 0.95, "grad_norm": 1.6822461584147104, "learning_rate": 7.598123579946382e-08, "loss": 0.6268, "step": 14781 }, { "epoch": 0.95, "grad_norm": 1.4961509263426502, "learning_rate": 7.580132211130354e-08, "loss": 0.628, "step": 14782 }, { "epoch": 0.95, "grad_norm": 1.425550852439918, "learning_rate": 7.562162005485484e-08, "loss": 0.6684, "step": 14783 }, { "epoch": 0.95, "grad_norm": 1.4160054620361922, "learning_rate": 7.544212963784159e-08, "loss": 0.6773, "step": 14784 }, { "epoch": 0.95, "grad_norm": 1.5405702211065249, "learning_rate": 7.526285086797813e-08, "loss": 0.6664, "step": 14785 }, { "epoch": 0.95, "grad_norm": 1.523078383369583, "learning_rate": 7.508378375296887e-08, "loss": 0.6274, "step": 14786 }, { "epoch": 0.95, "grad_norm": 1.6453723201422978, "learning_rate": 7.490492830050933e-08, "loss": 0.6678, "step": 14787 }, { "epoch": 0.95, "grad_norm": 1.519192429304574, "learning_rate": 7.472628451828779e-08, "loss": 0.663, "step": 14788 }, { "epoch": 0.95, "grad_norm": 1.0550121029029065, "learning_rate": 7.454785241398033e-08, "loss": 0.6844, "step": 14789 }, { "epoch": 0.95, "grad_norm": 1.0393545850616128, "learning_rate": 7.436963199525693e-08, "loss": 0.5451, "step": 14790 }, { "epoch": 0.95, "grad_norm": 1.599053787312485, "learning_rate": 7.419162326977592e-08, "loss": 0.7259, "step": 14791 }, { "epoch": 0.95, "grad_norm": 3.4743151604822935, "learning_rate": 7.401382624518894e-08, "loss": 0.6455, "step": 14792 }, { "epoch": 0.95, "grad_norm": 1.8271994099489226, "learning_rate": 7.383624092913655e-08, "loss": 0.703, "step": 14793 }, { "epoch": 0.95, "grad_norm": 2.806726273961948, "learning_rate": 7.365886732925153e-08, "loss": 0.5999, "step": 14794 }, { "epoch": 0.95, "grad_norm": 1.7279591442730131, "learning_rate": 7.348170545315614e-08, "loss": 0.7167, "step": 14795 }, { "epoch": 0.95, "grad_norm": 2.55971390929034, "learning_rate": 7.330475530846537e-08, "loss": 0.6582, "step": 14796 }, { "epoch": 0.95, "grad_norm": 1.5773573230212616, "learning_rate": 7.312801690278426e-08, "loss": 0.6262, "step": 14797 }, { "epoch": 0.95, "grad_norm": 1.24271250212963, "learning_rate": 7.29514902437073e-08, "loss": 0.5725, "step": 14798 }, { "epoch": 0.95, "grad_norm": 1.7433842931803265, "learning_rate": 7.277517533882283e-08, "loss": 0.7172, "step": 14799 }, { "epoch": 0.95, "grad_norm": 1.5444342717799748, "learning_rate": 7.259907219570761e-08, "loss": 0.7273, "step": 14800 }, { "epoch": 0.95, "grad_norm": 1.6974852471962336, "learning_rate": 7.242318082193e-08, "loss": 0.6451, "step": 14801 }, { "epoch": 0.95, "grad_norm": 1.4690081852516388, "learning_rate": 7.224750122505009e-08, "loss": 0.6734, "step": 14802 }, { "epoch": 0.95, "grad_norm": 1.5625037484060411, "learning_rate": 7.207203341261792e-08, "loss": 0.5586, "step": 14803 }, { "epoch": 0.95, "grad_norm": 1.7292115722008266, "learning_rate": 7.189677739217526e-08, "loss": 0.6185, "step": 14804 }, { "epoch": 0.95, "grad_norm": 1.484244249393604, "learning_rate": 7.172173317125275e-08, "loss": 0.6898, "step": 14805 }, { "epoch": 0.95, "grad_norm": 1.5548183428621245, "learning_rate": 7.154690075737547e-08, "loss": 0.6972, "step": 14806 }, { "epoch": 0.95, "grad_norm": 1.6915797105812076, "learning_rate": 7.137228015805519e-08, "loss": 0.6569, "step": 14807 }, { "epoch": 0.95, "grad_norm": 1.7578628118158393, "learning_rate": 7.11978713807987e-08, "loss": 0.6373, "step": 14808 }, { "epoch": 0.95, "grad_norm": 1.6424871428865577, "learning_rate": 7.102367443310054e-08, "loss": 0.625, "step": 14809 }, { "epoch": 0.95, "grad_norm": 1.8292526501844109, "learning_rate": 7.084968932244751e-08, "loss": 0.6926, "step": 14810 }, { "epoch": 0.95, "grad_norm": 1.7961678650934563, "learning_rate": 7.067591605631752e-08, "loss": 0.6439, "step": 14811 }, { "epoch": 0.95, "grad_norm": 1.5175968847008456, "learning_rate": 7.050235464217847e-08, "loss": 0.6979, "step": 14812 }, { "epoch": 0.95, "grad_norm": 1.4742785652231396, "learning_rate": 7.032900508749052e-08, "loss": 0.6478, "step": 14813 }, { "epoch": 0.95, "grad_norm": 1.422788901460827, "learning_rate": 7.01558673997027e-08, "loss": 0.6869, "step": 14814 }, { "epoch": 0.95, "grad_norm": 1.490662153848861, "learning_rate": 6.998294158625684e-08, "loss": 0.7159, "step": 14815 }, { "epoch": 0.95, "grad_norm": 1.5438804663150267, "learning_rate": 6.981022765458423e-08, "loss": 0.6332, "step": 14816 }, { "epoch": 0.95, "grad_norm": 1.8702666173738862, "learning_rate": 6.963772561210891e-08, "loss": 0.7067, "step": 14817 }, { "epoch": 0.95, "grad_norm": 1.4521841992146585, "learning_rate": 6.946543546624384e-08, "loss": 0.6891, "step": 14818 }, { "epoch": 0.95, "grad_norm": 1.5579856379549966, "learning_rate": 6.929335722439367e-08, "loss": 0.6388, "step": 14819 }, { "epoch": 0.95, "grad_norm": 1.689898595318142, "learning_rate": 6.91214908939547e-08, "loss": 0.6099, "step": 14820 }, { "epoch": 0.95, "grad_norm": 1.1318719614900115, "learning_rate": 6.894983648231213e-08, "loss": 0.6237, "step": 14821 }, { "epoch": 0.95, "grad_norm": 1.2701761223743966, "learning_rate": 6.877839399684505e-08, "loss": 0.6345, "step": 14822 }, { "epoch": 0.95, "grad_norm": 1.4613554082352582, "learning_rate": 6.86071634449198e-08, "loss": 0.6496, "step": 14823 }, { "epoch": 0.95, "grad_norm": 1.4153031222104133, "learning_rate": 6.84361448338966e-08, "loss": 0.6558, "step": 14824 }, { "epoch": 0.95, "grad_norm": 1.6589745772318978, "learning_rate": 6.826533817112513e-08, "loss": 0.6915, "step": 14825 }, { "epoch": 0.95, "grad_norm": 1.6930768300096983, "learning_rate": 6.809474346394673e-08, "loss": 0.653, "step": 14826 }, { "epoch": 0.95, "grad_norm": 1.5876145088853975, "learning_rate": 6.792436071969277e-08, "loss": 0.6678, "step": 14827 }, { "epoch": 0.95, "grad_norm": 2.6226989748035976, "learning_rate": 6.775418994568572e-08, "loss": 0.6514, "step": 14828 }, { "epoch": 0.95, "grad_norm": 1.7699667784763464, "learning_rate": 6.758423114924029e-08, "loss": 0.6939, "step": 14829 }, { "epoch": 0.95, "grad_norm": 1.682791450452236, "learning_rate": 6.741448433765951e-08, "loss": 0.6506, "step": 14830 }, { "epoch": 0.95, "grad_norm": 1.4803126492224867, "learning_rate": 6.724494951823979e-08, "loss": 0.6485, "step": 14831 }, { "epoch": 0.95, "grad_norm": 1.7214523485116506, "learning_rate": 6.707562669826695e-08, "loss": 0.6116, "step": 14832 }, { "epoch": 0.95, "grad_norm": 1.5741360263591293, "learning_rate": 6.690651588501795e-08, "loss": 0.6933, "step": 14833 }, { "epoch": 0.95, "grad_norm": 1.2667879991385713, "learning_rate": 6.673761708576088e-08, "loss": 0.6549, "step": 14834 }, { "epoch": 0.95, "grad_norm": 1.332847483863145, "learning_rate": 6.656893030775546e-08, "loss": 0.7089, "step": 14835 }, { "epoch": 0.95, "grad_norm": 1.909863358160812, "learning_rate": 6.640045555825036e-08, "loss": 0.7561, "step": 14836 }, { "epoch": 0.95, "grad_norm": 1.5224208226361875, "learning_rate": 6.623219284448645e-08, "loss": 0.6437, "step": 14837 }, { "epoch": 0.95, "grad_norm": 1.5385185188290869, "learning_rate": 6.606414217369628e-08, "loss": 0.7178, "step": 14838 }, { "epoch": 0.95, "grad_norm": 1.4540531543827386, "learning_rate": 6.589630355310128e-08, "loss": 0.6399, "step": 14839 }, { "epoch": 0.95, "grad_norm": 1.159175240196333, "learning_rate": 6.572867698991515e-08, "loss": 0.6862, "step": 14840 }, { "epoch": 0.95, "grad_norm": 1.5538379887575644, "learning_rate": 6.556126249134209e-08, "loss": 0.6999, "step": 14841 }, { "epoch": 0.95, "grad_norm": 1.7303666645075422, "learning_rate": 6.539406006457749e-08, "loss": 0.5293, "step": 14842 }, { "epoch": 0.95, "grad_norm": 1.5571301231865302, "learning_rate": 6.522706971680726e-08, "loss": 0.6492, "step": 14843 }, { "epoch": 0.95, "grad_norm": 3.157844139355378, "learning_rate": 6.506029145520842e-08, "loss": 0.578, "step": 14844 }, { "epoch": 0.95, "grad_norm": 1.3808199597441357, "learning_rate": 6.489372528694748e-08, "loss": 0.554, "step": 14845 }, { "epoch": 0.95, "grad_norm": 1.6067600410487959, "learning_rate": 6.472737121918483e-08, "loss": 0.7, "step": 14846 }, { "epoch": 0.95, "grad_norm": 1.468514275810725, "learning_rate": 6.456122925906971e-08, "loss": 0.6879, "step": 14847 }, { "epoch": 0.95, "grad_norm": 1.224657768142011, "learning_rate": 6.439529941374145e-08, "loss": 0.5451, "step": 14848 }, { "epoch": 0.95, "grad_norm": 1.2235613537715024, "learning_rate": 6.422958169033266e-08, "loss": 0.5268, "step": 14849 }, { "epoch": 0.95, "grad_norm": 1.6310185679812539, "learning_rate": 6.406407609596488e-08, "loss": 0.7518, "step": 14850 }, { "epoch": 0.95, "grad_norm": 1.1942548358229779, "learning_rate": 6.389878263775129e-08, "loss": 0.6393, "step": 14851 }, { "epoch": 0.95, "grad_norm": 1.4509484561241306, "learning_rate": 6.373370132279566e-08, "loss": 0.6282, "step": 14852 }, { "epoch": 0.95, "grad_norm": 1.4073347521303783, "learning_rate": 6.356883215819287e-08, "loss": 0.6162, "step": 14853 }, { "epoch": 0.95, "grad_norm": 1.6392319439911867, "learning_rate": 6.340417515102893e-08, "loss": 0.6673, "step": 14854 }, { "epoch": 0.95, "grad_norm": 1.4046348850173547, "learning_rate": 6.323973030838037e-08, "loss": 0.572, "step": 14855 }, { "epoch": 0.95, "grad_norm": 1.4318286592362848, "learning_rate": 6.307549763731436e-08, "loss": 0.6932, "step": 14856 }, { "epoch": 0.95, "grad_norm": 1.6255801852437042, "learning_rate": 6.291147714488965e-08, "loss": 0.7116, "step": 14857 }, { "epoch": 0.95, "grad_norm": 1.5417276694881044, "learning_rate": 6.274766883815565e-08, "loss": 0.7694, "step": 14858 }, { "epoch": 0.95, "grad_norm": 1.9934437238999332, "learning_rate": 6.258407272415223e-08, "loss": 0.7129, "step": 14859 }, { "epoch": 0.95, "grad_norm": 1.5115747357822193, "learning_rate": 6.242068880991048e-08, "loss": 0.5738, "step": 14860 }, { "epoch": 0.95, "grad_norm": 1.7397676564671714, "learning_rate": 6.225751710245198e-08, "loss": 0.7054, "step": 14861 }, { "epoch": 0.95, "grad_norm": 2.953230136101961, "learning_rate": 6.209455760879002e-08, "loss": 0.5902, "step": 14862 }, { "epoch": 0.95, "grad_norm": 1.7012169013736407, "learning_rate": 6.193181033592788e-08, "loss": 0.5638, "step": 14863 }, { "epoch": 0.95, "grad_norm": 1.5922908089548282, "learning_rate": 6.176927529086052e-08, "loss": 0.6906, "step": 14864 }, { "epoch": 0.95, "grad_norm": 1.6733162038932758, "learning_rate": 6.160695248057236e-08, "loss": 0.693, "step": 14865 }, { "epoch": 0.95, "grad_norm": 1.6156720827009765, "learning_rate": 6.144484191204115e-08, "loss": 0.5571, "step": 14866 }, { "epoch": 0.95, "grad_norm": 1.6101827951423913, "learning_rate": 6.128294359223297e-08, "loss": 0.7283, "step": 14867 }, { "epoch": 0.95, "grad_norm": 1.603962447666268, "learning_rate": 6.11212575281056e-08, "loss": 0.6648, "step": 14868 }, { "epoch": 0.95, "grad_norm": 1.2496386619565145, "learning_rate": 6.09597837266096e-08, "loss": 0.6153, "step": 14869 }, { "epoch": 0.95, "grad_norm": 1.791079208443586, "learning_rate": 6.07985221946833e-08, "loss": 0.6547, "step": 14870 }, { "epoch": 0.95, "grad_norm": 1.7452206234641638, "learning_rate": 6.063747293925781e-08, "loss": 0.6061, "step": 14871 }, { "epoch": 0.95, "grad_norm": 1.618912866746607, "learning_rate": 6.047663596725428e-08, "loss": 0.6138, "step": 14872 }, { "epoch": 0.95, "grad_norm": 1.7071851001142784, "learning_rate": 6.031601128558606e-08, "loss": 0.6354, "step": 14873 }, { "epoch": 0.95, "grad_norm": 1.6280266750380243, "learning_rate": 6.015559890115597e-08, "loss": 0.7123, "step": 14874 }, { "epoch": 0.95, "grad_norm": 2.3014798348253884, "learning_rate": 5.999539882085793e-08, "loss": 0.7115, "step": 14875 }, { "epoch": 0.95, "grad_norm": 1.6344153092551743, "learning_rate": 5.983541105157809e-08, "loss": 0.5589, "step": 14876 }, { "epoch": 0.95, "grad_norm": 1.2944464399880533, "learning_rate": 5.96756356001904e-08, "loss": 0.5416, "step": 14877 }, { "epoch": 0.95, "grad_norm": 1.5004003687885492, "learning_rate": 5.951607247356384e-08, "loss": 0.7203, "step": 14878 }, { "epoch": 0.95, "grad_norm": 1.5121930843925078, "learning_rate": 5.9356721678554554e-08, "loss": 0.6386, "step": 14879 }, { "epoch": 0.95, "grad_norm": 1.7410462629188352, "learning_rate": 5.9197583222011525e-08, "loss": 0.7004, "step": 14880 }, { "epoch": 0.95, "grad_norm": 1.812463336087077, "learning_rate": 5.903865711077483e-08, "loss": 0.6969, "step": 14881 }, { "epoch": 0.95, "grad_norm": 1.6062559364534075, "learning_rate": 5.887994335167346e-08, "loss": 0.5812, "step": 14882 }, { "epoch": 0.95, "grad_norm": 1.5587450948995394, "learning_rate": 5.872144195153029e-08, "loss": 0.6598, "step": 14883 }, { "epoch": 0.95, "grad_norm": 1.4889939841099111, "learning_rate": 5.8563152917155975e-08, "loss": 0.6255, "step": 14884 }, { "epoch": 0.95, "grad_norm": 1.572038115373691, "learning_rate": 5.840507625535397e-08, "loss": 0.6213, "step": 14885 }, { "epoch": 0.95, "grad_norm": 1.588539809545351, "learning_rate": 5.824721197291827e-08, "loss": 0.6905, "step": 14886 }, { "epoch": 0.95, "grad_norm": 1.6503919799848084, "learning_rate": 5.808956007663291e-08, "loss": 0.7114, "step": 14887 }, { "epoch": 0.95, "grad_norm": 1.5233018649245016, "learning_rate": 5.793212057327469e-08, "loss": 0.6913, "step": 14888 }, { "epoch": 0.95, "grad_norm": 1.442421049391195, "learning_rate": 5.777489346960874e-08, "loss": 0.6452, "step": 14889 }, { "epoch": 0.95, "grad_norm": 1.3035799956971876, "learning_rate": 5.7617878772392445e-08, "loss": 0.7059, "step": 14890 }, { "epoch": 0.95, "grad_norm": 1.6236372397535823, "learning_rate": 5.7461076488374844e-08, "loss": 0.5972, "step": 14891 }, { "epoch": 0.95, "grad_norm": 1.3960791508117232, "learning_rate": 5.730448662429444e-08, "loss": 0.5963, "step": 14892 }, { "epoch": 0.95, "grad_norm": 1.8233354548384149, "learning_rate": 5.7148109186880854e-08, "loss": 0.6812, "step": 14893 }, { "epoch": 0.95, "grad_norm": 1.5968687393149619, "learning_rate": 5.699194418285592e-08, "loss": 0.6382, "step": 14894 }, { "epoch": 0.95, "grad_norm": 1.5480320816520747, "learning_rate": 5.683599161892928e-08, "loss": 0.7073, "step": 14895 }, { "epoch": 0.95, "grad_norm": 1.1853813366225605, "learning_rate": 5.6680251501805564e-08, "loss": 0.643, "step": 14896 }, { "epoch": 0.95, "grad_norm": 1.4678124100939007, "learning_rate": 5.65247238381772e-08, "loss": 0.6068, "step": 14897 }, { "epoch": 0.95, "grad_norm": 2.0903647320115035, "learning_rate": 5.636940863472884e-08, "loss": 0.6017, "step": 14898 }, { "epoch": 0.95, "grad_norm": 1.4820976529016088, "learning_rate": 5.621430589813459e-08, "loss": 0.6583, "step": 14899 }, { "epoch": 0.95, "grad_norm": 1.5466022447473031, "learning_rate": 5.6059415635061896e-08, "loss": 0.6556, "step": 14900 }, { "epoch": 0.95, "grad_norm": 1.3845791900041982, "learning_rate": 5.5904737852166545e-08, "loss": 0.5772, "step": 14901 }, { "epoch": 0.95, "grad_norm": 2.084224342612809, "learning_rate": 5.5750272556095996e-08, "loss": 0.6727, "step": 14902 }, { "epoch": 0.95, "grad_norm": 1.2970405773893905, "learning_rate": 5.559601975348994e-08, "loss": 0.6351, "step": 14903 }, { "epoch": 0.95, "grad_norm": 1.5906505441409486, "learning_rate": 5.544197945097751e-08, "loss": 0.6233, "step": 14904 }, { "epoch": 0.95, "grad_norm": 1.0778733589289233, "learning_rate": 5.5288151655178427e-08, "loss": 0.6497, "step": 14905 }, { "epoch": 0.95, "grad_norm": 1.6066262541570273, "learning_rate": 5.51345363727046e-08, "loss": 0.6513, "step": 14906 }, { "epoch": 0.95, "grad_norm": 1.4611162865547467, "learning_rate": 5.4981133610158e-08, "loss": 0.6219, "step": 14907 }, { "epoch": 0.95, "grad_norm": 1.3906818261043543, "learning_rate": 5.482794337413111e-08, "loss": 0.7476, "step": 14908 }, { "epoch": 0.95, "grad_norm": 1.6497017223738148, "learning_rate": 5.4674965671208115e-08, "loss": 0.6883, "step": 14909 }, { "epoch": 0.95, "grad_norm": 1.5411057941010826, "learning_rate": 5.45222005079632e-08, "loss": 0.6308, "step": 14910 }, { "epoch": 0.95, "grad_norm": 1.561773032533706, "learning_rate": 5.436964789096222e-08, "loss": 0.6687, "step": 14911 }, { "epoch": 0.95, "grad_norm": 1.8681797709032817, "learning_rate": 5.4217307826762155e-08, "loss": 0.6281, "step": 14912 }, { "epoch": 0.95, "grad_norm": 1.3321743556484285, "learning_rate": 5.406518032190944e-08, "loss": 0.626, "step": 14913 }, { "epoch": 0.95, "grad_norm": 1.5801632446613827, "learning_rate": 5.391326538294217e-08, "loss": 0.5623, "step": 14914 }, { "epoch": 0.95, "grad_norm": 1.3041999122066639, "learning_rate": 5.3761563016389576e-08, "loss": 0.7923, "step": 14915 }, { "epoch": 0.95, "grad_norm": 1.8246012148336204, "learning_rate": 5.361007322877199e-08, "loss": 0.6287, "step": 14916 }, { "epoch": 0.95, "grad_norm": 1.5210408601305627, "learning_rate": 5.34587960265992e-08, "loss": 0.6665, "step": 14917 }, { "epoch": 0.95, "grad_norm": 1.5993861881881555, "learning_rate": 5.330773141637324e-08, "loss": 0.5698, "step": 14918 }, { "epoch": 0.95, "grad_norm": 1.6535046449763633, "learning_rate": 5.315687940458669e-08, "loss": 0.6981, "step": 14919 }, { "epoch": 0.95, "grad_norm": 1.7264540237011967, "learning_rate": 5.3006239997722694e-08, "loss": 0.6185, "step": 14920 }, { "epoch": 0.96, "grad_norm": 1.5363690327753985, "learning_rate": 5.285581320225552e-08, "loss": 0.5672, "step": 14921 }, { "epoch": 0.96, "grad_norm": 1.6588096685809923, "learning_rate": 5.270559902465e-08, "loss": 0.7324, "step": 14922 }, { "epoch": 0.96, "grad_norm": 1.592400333552993, "learning_rate": 5.255559747136263e-08, "loss": 0.6677, "step": 14923 }, { "epoch": 0.96, "grad_norm": 1.6412218151243672, "learning_rate": 5.240580854883881e-08, "loss": 0.7912, "step": 14924 }, { "epoch": 0.96, "grad_norm": 1.374288247221087, "learning_rate": 5.2256232263517835e-08, "loss": 0.6768, "step": 14925 }, { "epoch": 0.96, "grad_norm": 1.512880238051852, "learning_rate": 5.210686862182679e-08, "loss": 0.6065, "step": 14926 }, { "epoch": 0.96, "grad_norm": 1.6832782274484168, "learning_rate": 5.195771763018609e-08, "loss": 0.6949, "step": 14927 }, { "epoch": 0.96, "grad_norm": 1.8134242750919347, "learning_rate": 5.18087792950045e-08, "loss": 0.6843, "step": 14928 }, { "epoch": 0.96, "grad_norm": 1.620692141331549, "learning_rate": 5.166005362268467e-08, "loss": 0.7218, "step": 14929 }, { "epoch": 0.96, "grad_norm": 1.8116628170879352, "learning_rate": 5.151154061961761e-08, "loss": 0.6273, "step": 14930 }, { "epoch": 0.96, "grad_norm": 1.140299653557281, "learning_rate": 5.1363240292186535e-08, "loss": 0.6569, "step": 14931 }, { "epoch": 0.96, "grad_norm": 1.7859798921972876, "learning_rate": 5.121515264676524e-08, "loss": 0.7056, "step": 14932 }, { "epoch": 0.96, "grad_norm": 1.5451309114906366, "learning_rate": 5.1067277689716974e-08, "loss": 0.719, "step": 14933 }, { "epoch": 0.96, "grad_norm": 1.8263609179171447, "learning_rate": 5.091961542739887e-08, "loss": 0.6574, "step": 14934 }, { "epoch": 0.96, "grad_norm": 1.772413752683275, "learning_rate": 5.07721658661553e-08, "loss": 0.69, "step": 14935 }, { "epoch": 0.96, "grad_norm": 1.4881316478628257, "learning_rate": 5.0624929012325076e-08, "loss": 0.647, "step": 14936 }, { "epoch": 0.96, "grad_norm": 1.612699051905796, "learning_rate": 5.0477904872234804e-08, "loss": 0.7441, "step": 14937 }, { "epoch": 0.96, "grad_norm": 1.7209947471150044, "learning_rate": 5.033109345220388e-08, "loss": 0.5681, "step": 14938 }, { "epoch": 0.96, "grad_norm": 1.6570005014452123, "learning_rate": 5.018449475854226e-08, "loss": 0.6242, "step": 14939 }, { "epoch": 0.96, "grad_norm": 1.235347304484116, "learning_rate": 5.003810879754933e-08, "loss": 0.656, "step": 14940 }, { "epoch": 0.96, "grad_norm": 1.2146342575947306, "learning_rate": 4.9891935575517856e-08, "loss": 0.6131, "step": 14941 }, { "epoch": 0.96, "grad_norm": 4.1985969960978355, "learning_rate": 4.974597509872892e-08, "loss": 0.5996, "step": 14942 }, { "epoch": 0.96, "grad_norm": 1.4488505612376699, "learning_rate": 4.9600227373456936e-08, "loss": 0.5986, "step": 14943 }, { "epoch": 0.96, "grad_norm": 1.4308362465083515, "learning_rate": 4.945469240596412e-08, "loss": 0.6415, "step": 14944 }, { "epoch": 0.96, "grad_norm": 1.4513126051170693, "learning_rate": 4.930937020250604e-08, "loss": 0.6866, "step": 14945 }, { "epoch": 0.96, "grad_norm": 3.638866027923355, "learning_rate": 4.9164260769328785e-08, "loss": 0.6466, "step": 14946 }, { "epoch": 0.96, "grad_norm": 2.023899000858885, "learning_rate": 4.90193641126685e-08, "loss": 0.6566, "step": 14947 }, { "epoch": 0.96, "grad_norm": 1.582406934663005, "learning_rate": 4.887468023875241e-08, "loss": 0.6248, "step": 14948 }, { "epoch": 0.96, "grad_norm": 1.5157746643281276, "learning_rate": 4.873020915379834e-08, "loss": 0.6432, "step": 14949 }, { "epoch": 0.96, "grad_norm": 2.1643365821771634, "learning_rate": 4.858595086401685e-08, "loss": 0.7188, "step": 14950 }, { "epoch": 0.96, "grad_norm": 1.6059859111906962, "learning_rate": 4.844190537560578e-08, "loss": 0.6577, "step": 14951 }, { "epoch": 0.96, "grad_norm": 1.5052505581004307, "learning_rate": 4.829807269475739e-08, "loss": 0.565, "step": 14952 }, { "epoch": 0.96, "grad_norm": 1.1156905440304115, "learning_rate": 4.8154452827652854e-08, "loss": 0.6462, "step": 14953 }, { "epoch": 0.96, "grad_norm": 1.6311045501593113, "learning_rate": 4.8011045780465e-08, "loss": 0.631, "step": 14954 }, { "epoch": 0.96, "grad_norm": 1.423908301520977, "learning_rate": 4.7867851559356694e-08, "loss": 0.5567, "step": 14955 }, { "epoch": 0.96, "grad_norm": 1.443487001926417, "learning_rate": 4.772487017048189e-08, "loss": 0.6785, "step": 14956 }, { "epoch": 0.96, "grad_norm": 1.5176170376521299, "learning_rate": 4.758210161998622e-08, "loss": 0.694, "step": 14957 }, { "epoch": 0.96, "grad_norm": 1.5630191289072686, "learning_rate": 4.7439545914005345e-08, "loss": 0.9334, "step": 14958 }, { "epoch": 0.96, "grad_norm": 1.4797125582299384, "learning_rate": 4.729720305866603e-08, "loss": 0.6692, "step": 14959 }, { "epoch": 0.96, "grad_norm": 1.484964375853846, "learning_rate": 4.7155073060086156e-08, "loss": 0.6416, "step": 14960 }, { "epoch": 0.96, "grad_norm": 1.7271718025697103, "learning_rate": 4.701315592437361e-08, "loss": 0.6629, "step": 14961 }, { "epoch": 0.96, "grad_norm": 1.5993987179917997, "learning_rate": 4.687145165762797e-08, "loss": 0.6749, "step": 14962 }, { "epoch": 0.96, "grad_norm": 2.8228111163647704, "learning_rate": 4.6729960265939344e-08, "loss": 0.6684, "step": 14963 }, { "epoch": 0.96, "grad_norm": 1.6903725267315632, "learning_rate": 4.6588681755388445e-08, "loss": 0.5845, "step": 14964 }, { "epoch": 0.96, "grad_norm": 1.4091029362270389, "learning_rate": 4.644761613204818e-08, "loss": 0.6369, "step": 14965 }, { "epoch": 0.96, "grad_norm": 1.7130093640083104, "learning_rate": 4.630676340198037e-08, "loss": 0.7528, "step": 14966 }, { "epoch": 0.96, "grad_norm": 1.3886535180225834, "learning_rate": 4.6166123571237955e-08, "loss": 0.7564, "step": 14967 }, { "epoch": 0.96, "grad_norm": 1.4318759895412652, "learning_rate": 4.6025696645866646e-08, "loss": 0.6156, "step": 14968 }, { "epoch": 0.96, "grad_norm": 1.8056165887184152, "learning_rate": 4.588548263190107e-08, "loss": 0.6636, "step": 14969 }, { "epoch": 0.96, "grad_norm": 1.4693147736831627, "learning_rate": 4.574548153536806e-08, "loss": 0.5795, "step": 14970 }, { "epoch": 0.96, "grad_norm": 1.5352765772955717, "learning_rate": 4.560569336228338e-08, "loss": 0.5879, "step": 14971 }, { "epoch": 0.96, "grad_norm": 1.5992082761278634, "learning_rate": 4.546611811865498e-08, "loss": 0.6511, "step": 14972 }, { "epoch": 0.96, "grad_norm": 1.8996861288555464, "learning_rate": 4.5326755810482514e-08, "loss": 0.6799, "step": 14973 }, { "epoch": 0.96, "grad_norm": 1.6993967733088402, "learning_rate": 4.518760644375508e-08, "loss": 0.6595, "step": 14974 }, { "epoch": 0.96, "grad_norm": 1.4497061175961428, "learning_rate": 4.504867002445179e-08, "loss": 0.6646, "step": 14975 }, { "epoch": 0.96, "grad_norm": 1.6907172148226168, "learning_rate": 4.4909946558545634e-08, "loss": 0.5708, "step": 14976 }, { "epoch": 0.96, "grad_norm": 1.686227378303974, "learning_rate": 4.477143605199796e-08, "loss": 0.6614, "step": 14977 }, { "epoch": 0.96, "grad_norm": 1.2820842230387643, "learning_rate": 4.463313851076123e-08, "loss": 0.7219, "step": 14978 }, { "epoch": 0.96, "grad_norm": 1.752318345832646, "learning_rate": 4.449505394078013e-08, "loss": 0.719, "step": 14979 }, { "epoch": 0.96, "grad_norm": 1.6213732349517438, "learning_rate": 4.435718234798825e-08, "loss": 0.815, "step": 14980 }, { "epoch": 0.96, "grad_norm": 1.5153829323717296, "learning_rate": 4.4219523738311396e-08, "loss": 0.6843, "step": 14981 }, { "epoch": 0.96, "grad_norm": 1.2141348586087946, "learning_rate": 4.408207811766596e-08, "loss": 0.6454, "step": 14982 }, { "epoch": 0.96, "grad_norm": 1.832405270564642, "learning_rate": 4.3944845491958874e-08, "loss": 0.6348, "step": 14983 }, { "epoch": 0.96, "grad_norm": 1.4693300319917206, "learning_rate": 4.3807825867088204e-08, "loss": 0.5998, "step": 14984 }, { "epoch": 0.96, "grad_norm": 1.6097639413143636, "learning_rate": 4.3671019248943126e-08, "loss": 0.6236, "step": 14985 }, { "epoch": 0.96, "grad_norm": 2.095349573814773, "learning_rate": 4.353442564340282e-08, "loss": 0.619, "step": 14986 }, { "epoch": 0.96, "grad_norm": 1.3501684215457235, "learning_rate": 4.3398045056337604e-08, "loss": 0.5324, "step": 14987 }, { "epoch": 0.96, "grad_norm": 1.6531728788817355, "learning_rate": 4.326187749360944e-08, "loss": 0.6304, "step": 14988 }, { "epoch": 0.96, "grad_norm": 1.5752868115171483, "learning_rate": 4.312592296106977e-08, "loss": 0.645, "step": 14989 }, { "epoch": 0.96, "grad_norm": 1.552432170342405, "learning_rate": 4.2990181464562795e-08, "loss": 0.671, "step": 14990 }, { "epoch": 0.96, "grad_norm": 1.5720969440574821, "learning_rate": 4.285465300992164e-08, "loss": 0.6206, "step": 14991 }, { "epoch": 0.96, "grad_norm": 1.7476697593219277, "learning_rate": 4.271933760297109e-08, "loss": 0.6739, "step": 14992 }, { "epoch": 0.96, "grad_norm": 1.7281853593528795, "learning_rate": 4.258423524952648e-08, "loss": 0.6436, "step": 14993 }, { "epoch": 0.96, "grad_norm": 1.4442564349593907, "learning_rate": 4.2449345955394295e-08, "loss": 0.6631, "step": 14994 }, { "epoch": 0.96, "grad_norm": 1.4296265421387477, "learning_rate": 4.231466972637211e-08, "loss": 0.6773, "step": 14995 }, { "epoch": 0.96, "grad_norm": 1.614848697091138, "learning_rate": 4.2180206568248064e-08, "loss": 0.728, "step": 14996 }, { "epoch": 0.96, "grad_norm": 1.4697691992600987, "learning_rate": 4.2045956486800877e-08, "loss": 0.6236, "step": 14997 }, { "epoch": 0.96, "grad_norm": 1.5728810022217352, "learning_rate": 4.191191948780038e-08, "loss": 0.709, "step": 14998 }, { "epoch": 0.96, "grad_norm": 1.537208468350439, "learning_rate": 4.177809557700752e-08, "loss": 0.6791, "step": 14999 }, { "epoch": 0.96, "grad_norm": 1.1565058963072996, "learning_rate": 4.164448476017269e-08, "loss": 0.783, "step": 15000 }, { "epoch": 0.96, "grad_norm": 1.220793768982837, "learning_rate": 4.1511087043039635e-08, "loss": 0.6212, "step": 15001 }, { "epoch": 0.96, "grad_norm": 1.6421166415122155, "learning_rate": 4.1377902431340434e-08, "loss": 0.6083, "step": 15002 }, { "epoch": 0.96, "grad_norm": 1.4200912084674087, "learning_rate": 4.12449309307994e-08, "loss": 0.6593, "step": 15003 }, { "epoch": 0.96, "grad_norm": 1.272841199401204, "learning_rate": 4.111217254713196e-08, "loss": 0.6133, "step": 15004 }, { "epoch": 0.96, "grad_norm": 1.6698009664371114, "learning_rate": 4.097962728604299e-08, "loss": 0.6414, "step": 15005 }, { "epoch": 0.96, "grad_norm": 1.0242647991330243, "learning_rate": 4.0847295153229603e-08, "loss": 0.6049, "step": 15006 }, { "epoch": 0.96, "grad_norm": 1.6941060047301102, "learning_rate": 4.0715176154378366e-08, "loss": 0.6383, "step": 15007 }, { "epoch": 0.96, "grad_norm": 1.7255881903088008, "learning_rate": 4.0583270295168066e-08, "loss": 0.604, "step": 15008 }, { "epoch": 0.96, "grad_norm": 1.4106813196769552, "learning_rate": 4.04515775812675e-08, "loss": 0.6308, "step": 15009 }, { "epoch": 0.96, "grad_norm": 1.386021884467142, "learning_rate": 4.0320098018337136e-08, "loss": 0.5869, "step": 15010 }, { "epoch": 0.96, "grad_norm": 1.5616718917987282, "learning_rate": 4.018883161202691e-08, "loss": 0.6088, "step": 15011 }, { "epoch": 0.96, "grad_norm": 1.3601838505181845, "learning_rate": 4.0057778367978974e-08, "loss": 0.6046, "step": 15012 }, { "epoch": 0.96, "grad_norm": 1.6377188981098783, "learning_rate": 3.992693829182548e-08, "loss": 0.6321, "step": 15013 }, { "epoch": 0.96, "grad_norm": 1.5228440868574227, "learning_rate": 3.979631138918916e-08, "loss": 0.7076, "step": 15014 }, { "epoch": 0.96, "grad_norm": 1.5163878629008243, "learning_rate": 3.966589766568496e-08, "loss": 0.6128, "step": 15015 }, { "epoch": 0.96, "grad_norm": 1.0927592428730577, "learning_rate": 3.9535697126917296e-08, "loss": 0.6927, "step": 15016 }, { "epoch": 0.96, "grad_norm": 1.5118103314098943, "learning_rate": 3.940570977848168e-08, "loss": 0.5875, "step": 15017 }, { "epoch": 0.96, "grad_norm": 1.6884757320616905, "learning_rate": 3.9275935625965325e-08, "loss": 0.6929, "step": 15018 }, { "epoch": 0.96, "grad_norm": 1.8185489575601423, "learning_rate": 3.914637467494542e-08, "loss": 0.6368, "step": 15019 }, { "epoch": 0.96, "grad_norm": 2.1787425001224383, "learning_rate": 3.90170269309903e-08, "loss": 0.6206, "step": 15020 }, { "epoch": 0.96, "grad_norm": 1.6109634339649719, "learning_rate": 3.888789239965885e-08, "loss": 0.6629, "step": 15021 }, { "epoch": 0.96, "grad_norm": 1.6115349086200486, "learning_rate": 3.875897108650051e-08, "loss": 0.6485, "step": 15022 }, { "epoch": 0.96, "grad_norm": 2.068160238252765, "learning_rate": 3.863026299705697e-08, "loss": 0.7512, "step": 15023 }, { "epoch": 0.96, "grad_norm": 1.2178753657462016, "learning_rate": 3.850176813685935e-08, "loss": 0.7389, "step": 15024 }, { "epoch": 0.96, "grad_norm": 1.9569771373000229, "learning_rate": 3.8373486511429916e-08, "loss": 0.6581, "step": 15025 }, { "epoch": 0.96, "grad_norm": 1.48926949194542, "learning_rate": 3.824541812628258e-08, "loss": 0.6062, "step": 15026 }, { "epoch": 0.96, "grad_norm": 1.6468122372937932, "learning_rate": 3.811756298692126e-08, "loss": 0.6433, "step": 15027 }, { "epoch": 0.96, "grad_norm": 1.59397535394748, "learning_rate": 3.7989921098840476e-08, "loss": 0.6203, "step": 15028 }, { "epoch": 0.96, "grad_norm": 1.7640860635734537, "learning_rate": 3.7862492467526376e-08, "loss": 0.737, "step": 15029 }, { "epoch": 0.96, "grad_norm": 1.6921861938253289, "learning_rate": 3.77352770984557e-08, "loss": 0.6811, "step": 15030 }, { "epoch": 0.96, "grad_norm": 1.3806231883252578, "learning_rate": 3.7608274997095187e-08, "loss": 0.5256, "step": 15031 }, { "epoch": 0.96, "grad_norm": 1.584758071175505, "learning_rate": 3.748148616890379e-08, "loss": 0.6747, "step": 15032 }, { "epoch": 0.96, "grad_norm": 1.5546766331981292, "learning_rate": 3.735491061932994e-08, "loss": 0.68, "step": 15033 }, { "epoch": 0.96, "grad_norm": 1.5810221975469891, "learning_rate": 3.7228548353814844e-08, "loss": 0.6252, "step": 15034 }, { "epoch": 0.96, "grad_norm": 1.4360186714993513, "learning_rate": 3.710239937778803e-08, "loss": 0.6542, "step": 15035 }, { "epoch": 0.96, "grad_norm": 1.7791030785159727, "learning_rate": 3.697646369667185e-08, "loss": 0.6367, "step": 15036 }, { "epoch": 0.96, "grad_norm": 1.6081515809807119, "learning_rate": 3.685074131587863e-08, "loss": 0.6314, "step": 15037 }, { "epoch": 0.96, "grad_norm": 1.5445387926504566, "learning_rate": 3.672523224081126e-08, "loss": 0.6062, "step": 15038 }, { "epoch": 0.96, "grad_norm": 1.544602360653801, "learning_rate": 3.6599936476864325e-08, "loss": 0.5652, "step": 15039 }, { "epoch": 0.96, "grad_norm": 1.5216823646985396, "learning_rate": 3.647485402942241e-08, "loss": 0.6641, "step": 15040 }, { "epoch": 0.96, "grad_norm": 1.469532804666542, "learning_rate": 3.6349984903861214e-08, "loss": 0.7342, "step": 15041 }, { "epoch": 0.96, "grad_norm": 1.7214880073751415, "learning_rate": 3.6225329105548105e-08, "loss": 0.754, "step": 15042 }, { "epoch": 0.96, "grad_norm": 1.4799031020771747, "learning_rate": 3.6100886639839904e-08, "loss": 0.7454, "step": 15043 }, { "epoch": 0.96, "grad_norm": 1.5433030549788374, "learning_rate": 3.597665751208512e-08, "loss": 0.6115, "step": 15044 }, { "epoch": 0.96, "grad_norm": 1.684177176509517, "learning_rate": 3.5852641727622264e-08, "loss": 0.6566, "step": 15045 }, { "epoch": 0.96, "grad_norm": 1.7474248244174777, "learning_rate": 3.5728839291782614e-08, "loss": 0.6095, "step": 15046 }, { "epoch": 0.96, "grad_norm": 1.5668109273125825, "learning_rate": 3.5605250209885256e-08, "loss": 0.6599, "step": 15047 }, { "epoch": 0.96, "grad_norm": 1.7469377851444432, "learning_rate": 3.548187448724316e-08, "loss": 0.6128, "step": 15048 }, { "epoch": 0.96, "grad_norm": 1.4780112791545237, "learning_rate": 3.535871212915765e-08, "loss": 0.5978, "step": 15049 }, { "epoch": 0.96, "grad_norm": 1.585787096034734, "learning_rate": 3.523576314092281e-08, "loss": 0.5367, "step": 15050 }, { "epoch": 0.96, "grad_norm": 1.1711267330040915, "learning_rate": 3.51130275278222e-08, "loss": 0.6261, "step": 15051 }, { "epoch": 0.96, "grad_norm": 1.863255471714871, "learning_rate": 3.4990505295131594e-08, "loss": 0.6219, "step": 15052 }, { "epoch": 0.96, "grad_norm": 1.8233196633651985, "learning_rate": 3.4868196448115675e-08, "loss": 0.6191, "step": 15053 }, { "epoch": 0.96, "grad_norm": 1.4993756619088634, "learning_rate": 3.4746100992031354e-08, "loss": 0.6249, "step": 15054 }, { "epoch": 0.96, "grad_norm": 1.563272794502084, "learning_rate": 3.46242189321272e-08, "loss": 0.7195, "step": 15055 }, { "epoch": 0.96, "grad_norm": 1.5585223847199061, "learning_rate": 3.4502550273639026e-08, "loss": 0.6889, "step": 15056 }, { "epoch": 0.96, "grad_norm": 1.4739490104673159, "learning_rate": 3.4381095021798203e-08, "loss": 0.6409, "step": 15057 }, { "epoch": 0.96, "grad_norm": 1.585790316659333, "learning_rate": 3.4259853181823345e-08, "loss": 0.5739, "step": 15058 }, { "epoch": 0.96, "grad_norm": 1.7399550985193621, "learning_rate": 3.4138824758925826e-08, "loss": 0.6843, "step": 15059 }, { "epoch": 0.96, "grad_norm": 1.7937368286122177, "learning_rate": 3.401800975830705e-08, "loss": 0.6157, "step": 15060 }, { "epoch": 0.96, "grad_norm": 1.5495059486615357, "learning_rate": 3.389740818515841e-08, "loss": 0.6791, "step": 15061 }, { "epoch": 0.96, "grad_norm": 1.8809661629821426, "learning_rate": 3.3777020044664655e-08, "loss": 0.6675, "step": 15062 }, { "epoch": 0.96, "grad_norm": 1.5370998164135925, "learning_rate": 3.3656845341998865e-08, "loss": 0.6744, "step": 15063 }, { "epoch": 0.96, "grad_norm": 1.5220037992286275, "learning_rate": 3.353688408232636e-08, "loss": 0.7066, "step": 15064 }, { "epoch": 0.96, "grad_norm": 1.8092639136696738, "learning_rate": 3.341713627080245e-08, "loss": 0.7492, "step": 15065 }, { "epoch": 0.96, "grad_norm": 1.655899657860056, "learning_rate": 3.3297601912573584e-08, "loss": 0.6635, "step": 15066 }, { "epoch": 0.96, "grad_norm": 1.8162873544897633, "learning_rate": 3.317828101277787e-08, "loss": 0.7335, "step": 15067 }, { "epoch": 0.96, "grad_norm": 1.505394898140278, "learning_rate": 3.305917357654232e-08, "loss": 0.6435, "step": 15068 }, { "epoch": 0.96, "grad_norm": 0.9938111343962013, "learning_rate": 3.2940279608986714e-08, "loss": 0.62, "step": 15069 }, { "epoch": 0.96, "grad_norm": 1.2570332724023838, "learning_rate": 3.282159911522087e-08, "loss": 0.5856, "step": 15070 }, { "epoch": 0.96, "grad_norm": 1.6504331545799866, "learning_rate": 3.270313210034515e-08, "loss": 0.6459, "step": 15071 }, { "epoch": 0.96, "grad_norm": 1.6373269304331555, "learning_rate": 3.2584878569450474e-08, "loss": 0.6429, "step": 15072 }, { "epoch": 0.96, "grad_norm": 1.9321085580384878, "learning_rate": 3.246683852762056e-08, "loss": 0.6216, "step": 15073 }, { "epoch": 0.96, "grad_norm": 1.8860818816041318, "learning_rate": 3.234901197992746e-08, "loss": 0.6536, "step": 15074 }, { "epoch": 0.96, "grad_norm": 1.5183408471717652, "learning_rate": 3.22313989314349e-08, "loss": 0.674, "step": 15075 }, { "epoch": 0.96, "grad_norm": 1.5478280944189102, "learning_rate": 3.211399938719883e-08, "loss": 0.6431, "step": 15076 }, { "epoch": 0.97, "grad_norm": 1.4390460678806625, "learning_rate": 3.199681335226357e-08, "loss": 0.6977, "step": 15077 }, { "epoch": 0.97, "grad_norm": 1.4996555063045074, "learning_rate": 3.1879840831666175e-08, "loss": 0.5727, "step": 15078 }, { "epoch": 0.97, "grad_norm": 1.4528838651605716, "learning_rate": 3.176308183043375e-08, "loss": 0.731, "step": 15079 }, { "epoch": 0.97, "grad_norm": 2.11812897013659, "learning_rate": 3.1646536353584503e-08, "loss": 0.6853, "step": 15080 }, { "epoch": 0.97, "grad_norm": 1.7229267305005773, "learning_rate": 3.1530204406127196e-08, "loss": 0.8003, "step": 15081 }, { "epoch": 0.97, "grad_norm": 1.1498656568277126, "learning_rate": 3.141408599306117e-08, "loss": 0.5957, "step": 15082 }, { "epoch": 0.97, "grad_norm": 1.5577666247321733, "learning_rate": 3.129818111937744e-08, "loss": 0.6715, "step": 15083 }, { "epoch": 0.97, "grad_norm": 1.6328750910427725, "learning_rate": 3.1182489790057555e-08, "loss": 0.6682, "step": 15084 }, { "epoch": 0.97, "grad_norm": 1.5495532850106064, "learning_rate": 3.1067012010073114e-08, "loss": 0.8213, "step": 15085 }, { "epoch": 0.97, "grad_norm": 1.452766212472376, "learning_rate": 3.0951747784387363e-08, "loss": 0.6158, "step": 15086 }, { "epoch": 0.97, "grad_norm": 1.6542310284204138, "learning_rate": 3.0836697117954115e-08, "loss": 0.711, "step": 15087 }, { "epoch": 0.97, "grad_norm": 1.5296442543284976, "learning_rate": 3.072186001571775e-08, "loss": 0.659, "step": 15088 }, { "epoch": 0.97, "grad_norm": 1.4752383028784448, "learning_rate": 3.0607236482613764e-08, "loss": 0.6556, "step": 15089 }, { "epoch": 0.97, "grad_norm": 1.7334019825970988, "learning_rate": 3.049282652356878e-08, "loss": 0.6105, "step": 15090 }, { "epoch": 0.97, "grad_norm": 1.6286423507833994, "learning_rate": 3.037863014349995e-08, "loss": 0.6946, "step": 15091 }, { "epoch": 0.97, "grad_norm": 1.846547831531811, "learning_rate": 3.0264647347315044e-08, "loss": 0.6022, "step": 15092 }, { "epoch": 0.97, "grad_norm": 1.5052685096982437, "learning_rate": 3.0150878139912906e-08, "loss": 0.6767, "step": 15093 }, { "epoch": 0.97, "grad_norm": 1.4605038286275702, "learning_rate": 3.003732252618241e-08, "loss": 0.5686, "step": 15094 }, { "epoch": 0.97, "grad_norm": 1.5938068634314155, "learning_rate": 2.9923980511004645e-08, "loss": 0.7447, "step": 15095 }, { "epoch": 0.97, "grad_norm": 1.470443797628273, "learning_rate": 2.981085209925072e-08, "loss": 0.6581, "step": 15096 }, { "epoch": 0.97, "grad_norm": 1.8868431861985684, "learning_rate": 2.96979372957823e-08, "loss": 0.6707, "step": 15097 }, { "epoch": 0.97, "grad_norm": 1.4945164499796433, "learning_rate": 2.958523610545272e-08, "loss": 0.725, "step": 15098 }, { "epoch": 0.97, "grad_norm": 1.568454836226631, "learning_rate": 2.947274853310589e-08, "loss": 0.6185, "step": 15099 }, { "epoch": 0.97, "grad_norm": 1.574512883479758, "learning_rate": 2.936047458357516e-08, "loss": 0.6496, "step": 15100 }, { "epoch": 0.97, "grad_norm": 1.5171253508291471, "learning_rate": 2.9248414261686674e-08, "loss": 0.659, "step": 15101 }, { "epoch": 0.97, "grad_norm": 1.1402304182854028, "learning_rate": 2.9136567572256024e-08, "loss": 0.6977, "step": 15102 }, { "epoch": 0.97, "grad_norm": 1.2100446477013642, "learning_rate": 2.902493452009103e-08, "loss": 0.6677, "step": 15103 }, { "epoch": 0.97, "grad_norm": 1.4838135454255896, "learning_rate": 2.8913515109988966e-08, "loss": 0.7104, "step": 15104 }, { "epoch": 0.97, "grad_norm": 1.6347541126114584, "learning_rate": 2.8802309346737666e-08, "loss": 0.7182, "step": 15105 }, { "epoch": 0.97, "grad_norm": 2.090527606913522, "learning_rate": 2.86913172351172e-08, "loss": 0.6537, "step": 15106 }, { "epoch": 0.97, "grad_norm": 1.8600164229622407, "learning_rate": 2.8580538779898192e-08, "loss": 0.5554, "step": 15107 }, { "epoch": 0.97, "grad_norm": 1.4635338306797785, "learning_rate": 2.8469973985841283e-08, "loss": 0.6621, "step": 15108 }, { "epoch": 0.97, "grad_norm": 1.364764616976969, "learning_rate": 2.8359622857698223e-08, "loss": 0.5937, "step": 15109 }, { "epoch": 0.97, "grad_norm": 1.8790081937168577, "learning_rate": 2.824948540021133e-08, "loss": 0.5872, "step": 15110 }, { "epoch": 0.97, "grad_norm": 1.5684463273668339, "learning_rate": 2.813956161811515e-08, "loss": 0.7801, "step": 15111 }, { "epoch": 0.97, "grad_norm": 1.6253158953334763, "learning_rate": 2.8029851516132577e-08, "loss": 0.703, "step": 15112 }, { "epoch": 0.97, "grad_norm": 1.551979318649929, "learning_rate": 2.7920355098979835e-08, "loss": 0.5728, "step": 15113 }, { "epoch": 0.97, "grad_norm": 1.6203836996083871, "learning_rate": 2.7811072371362048e-08, "loss": 0.6981, "step": 15114 }, { "epoch": 0.97, "grad_norm": 1.35629304771993, "learning_rate": 2.7702003337977124e-08, "loss": 0.7017, "step": 15115 }, { "epoch": 0.97, "grad_norm": 1.7068296619292114, "learning_rate": 2.7593148003511317e-08, "loss": 0.6194, "step": 15116 }, { "epoch": 0.97, "grad_norm": 1.625073533160285, "learning_rate": 2.748450637264366e-08, "loss": 0.6764, "step": 15117 }, { "epoch": 0.97, "grad_norm": 1.6055803506385824, "learning_rate": 2.7376078450043198e-08, "loss": 0.6558, "step": 15118 }, { "epoch": 0.97, "grad_norm": 1.5214739730776574, "learning_rate": 2.7267864240369533e-08, "loss": 0.6119, "step": 15119 }, { "epoch": 0.97, "grad_norm": 1.4889499067218277, "learning_rate": 2.7159863748274506e-08, "loss": 0.6184, "step": 15120 }, { "epoch": 0.97, "grad_norm": 1.7410792329514397, "learning_rate": 2.7052076978398844e-08, "loss": 0.696, "step": 15121 }, { "epoch": 0.97, "grad_norm": 1.075621099487246, "learning_rate": 2.694450393537551e-08, "loss": 0.6728, "step": 15122 }, { "epoch": 0.97, "grad_norm": 1.4470824728751712, "learning_rate": 2.683714462382747e-08, "loss": 0.6719, "step": 15123 }, { "epoch": 0.97, "grad_norm": 1.736540756019525, "learning_rate": 2.672999904836937e-08, "loss": 0.698, "step": 15124 }, { "epoch": 0.97, "grad_norm": 1.5720803066850526, "learning_rate": 2.6623067213605302e-08, "loss": 0.6633, "step": 15125 }, { "epoch": 0.97, "grad_norm": 0.9868839748732102, "learning_rate": 2.6516349124131037e-08, "loss": 0.6005, "step": 15126 }, { "epoch": 0.97, "grad_norm": 1.3577553393203532, "learning_rate": 2.6409844784533456e-08, "loss": 0.6845, "step": 15127 }, { "epoch": 0.97, "grad_norm": 2.1425264801219948, "learning_rate": 2.6303554199390013e-08, "loss": 0.603, "step": 15128 }, { "epoch": 0.97, "grad_norm": 1.5246856015395378, "learning_rate": 2.6197477373268722e-08, "loss": 0.7092, "step": 15129 }, { "epoch": 0.97, "grad_norm": 1.0176698791426324, "learning_rate": 2.6091614310727597e-08, "loss": 0.5897, "step": 15130 }, { "epoch": 0.97, "grad_norm": 1.6166709423899754, "learning_rate": 2.5985965016318004e-08, "loss": 0.6752, "step": 15131 }, { "epoch": 0.97, "grad_norm": 1.6007702397538266, "learning_rate": 2.5880529494579643e-08, "loss": 0.5962, "step": 15132 }, { "epoch": 0.97, "grad_norm": 1.5193618800595488, "learning_rate": 2.577530775004389e-08, "loss": 0.6602, "step": 15133 }, { "epoch": 0.97, "grad_norm": 1.7586105081733188, "learning_rate": 2.5670299787233788e-08, "loss": 0.7021, "step": 15134 }, { "epoch": 0.97, "grad_norm": 1.4182835935427045, "learning_rate": 2.5565505610660734e-08, "loss": 0.6263, "step": 15135 }, { "epoch": 0.97, "grad_norm": 1.5513087894577382, "learning_rate": 2.5460925224830567e-08, "loss": 0.6346, "step": 15136 }, { "epoch": 0.97, "grad_norm": 1.5814679718513698, "learning_rate": 2.5356558634235806e-08, "loss": 0.6242, "step": 15137 }, { "epoch": 0.97, "grad_norm": 1.4656500727913209, "learning_rate": 2.525240584336397e-08, "loss": 0.6744, "step": 15138 }, { "epoch": 0.97, "grad_norm": 2.9060431486586626, "learning_rate": 2.5148466856689812e-08, "loss": 0.6553, "step": 15139 }, { "epoch": 0.97, "grad_norm": 1.5006718442156657, "learning_rate": 2.504474167868087e-08, "loss": 0.6735, "step": 15140 }, { "epoch": 0.97, "grad_norm": 1.3293472742425523, "learning_rate": 2.4941230313795252e-08, "loss": 0.59, "step": 15141 }, { "epoch": 0.97, "grad_norm": 1.3776793327153416, "learning_rate": 2.4837932766481608e-08, "loss": 0.6118, "step": 15142 }, { "epoch": 0.97, "grad_norm": 1.3794100839954018, "learning_rate": 2.4734849041179176e-08, "loss": 0.6649, "step": 15143 }, { "epoch": 0.97, "grad_norm": 1.5988486652743847, "learning_rate": 2.463197914231885e-08, "loss": 0.6265, "step": 15144 }, { "epoch": 0.97, "grad_norm": 1.6869840472402604, "learning_rate": 2.4529323074320988e-08, "loss": 0.6426, "step": 15145 }, { "epoch": 0.97, "grad_norm": 1.4381510835944793, "learning_rate": 2.4426880841598165e-08, "loss": 0.5306, "step": 15146 }, { "epoch": 0.97, "grad_norm": 1.5928088631181267, "learning_rate": 2.432465244855242e-08, "loss": 0.6058, "step": 15147 }, { "epoch": 0.97, "grad_norm": 1.8446592262230155, "learning_rate": 2.4222637899578015e-08, "loss": 0.6478, "step": 15148 }, { "epoch": 0.97, "grad_norm": 1.494689963069819, "learning_rate": 2.412083719905922e-08, "loss": 0.6595, "step": 15149 }, { "epoch": 0.97, "grad_norm": 1.4615734837950534, "learning_rate": 2.4019250351371427e-08, "loss": 0.7244, "step": 15150 }, { "epoch": 0.97, "grad_norm": 1.5853965635692606, "learning_rate": 2.3917877360879472e-08, "loss": 0.7245, "step": 15151 }, { "epoch": 0.97, "grad_norm": 1.6026587676013861, "learning_rate": 2.3816718231941537e-08, "loss": 0.6859, "step": 15152 }, { "epoch": 0.97, "grad_norm": 1.5296843558249162, "learning_rate": 2.371577296890415e-08, "loss": 0.6785, "step": 15153 }, { "epoch": 0.97, "grad_norm": 1.5869964982259261, "learning_rate": 2.3615041576106613e-08, "loss": 0.6174, "step": 15154 }, { "epoch": 0.97, "grad_norm": 1.3894325079107162, "learning_rate": 2.3514524057877685e-08, "loss": 0.6327, "step": 15155 }, { "epoch": 0.97, "grad_norm": 1.4848293636730565, "learning_rate": 2.3414220418537804e-08, "loss": 0.6134, "step": 15156 }, { "epoch": 0.97, "grad_norm": 1.6635622297090336, "learning_rate": 2.331413066239685e-08, "loss": 0.5857, "step": 15157 }, { "epoch": 0.97, "grad_norm": 1.7299429741715902, "learning_rate": 2.3214254793757497e-08, "loss": 0.6489, "step": 15158 }, { "epoch": 0.97, "grad_norm": 1.029761131401641, "learning_rate": 2.311459281691186e-08, "loss": 0.5918, "step": 15159 }, { "epoch": 0.97, "grad_norm": 1.556608500757093, "learning_rate": 2.301514473614319e-08, "loss": 0.5706, "step": 15160 }, { "epoch": 0.97, "grad_norm": 1.4160578237589512, "learning_rate": 2.2915910555725286e-08, "loss": 0.6775, "step": 15161 }, { "epoch": 0.97, "grad_norm": 1.4327741383193262, "learning_rate": 2.2816890279923064e-08, "loss": 0.6796, "step": 15162 }, { "epoch": 0.97, "grad_norm": 1.7120290674937968, "learning_rate": 2.2718083912992573e-08, "loss": 0.6271, "step": 15163 }, { "epoch": 0.97, "grad_norm": 1.578058356027151, "learning_rate": 2.261949145918041e-08, "loss": 0.6753, "step": 15164 }, { "epoch": 0.97, "grad_norm": 1.6046528033605372, "learning_rate": 2.2521112922723186e-08, "loss": 0.6857, "step": 15165 }, { "epoch": 0.97, "grad_norm": 1.5325450817130648, "learning_rate": 2.2422948307849746e-08, "loss": 0.6819, "step": 15166 }, { "epoch": 0.97, "grad_norm": 1.805492253683114, "learning_rate": 2.2324997618778375e-08, "loss": 0.5345, "step": 15167 }, { "epoch": 0.97, "grad_norm": 1.6015338017548575, "learning_rate": 2.2227260859719047e-08, "loss": 0.6417, "step": 15168 }, { "epoch": 0.97, "grad_norm": 1.4959241265202325, "learning_rate": 2.212973803487284e-08, "loss": 0.6662, "step": 15169 }, { "epoch": 0.97, "grad_norm": 1.4506717996656147, "learning_rate": 2.2032429148429735e-08, "loss": 0.5368, "step": 15170 }, { "epoch": 0.97, "grad_norm": 1.4208335508839212, "learning_rate": 2.1935334204573056e-08, "loss": 0.7724, "step": 15171 }, { "epoch": 0.97, "grad_norm": 1.0674502696709516, "learning_rate": 2.1838453207475574e-08, "loss": 0.6349, "step": 15172 }, { "epoch": 0.97, "grad_norm": 1.890627843658031, "learning_rate": 2.1741786161300628e-08, "loss": 0.6839, "step": 15173 }, { "epoch": 0.97, "grad_norm": 1.5328642401923893, "learning_rate": 2.1645333070203222e-08, "loss": 0.6219, "step": 15174 }, { "epoch": 0.97, "grad_norm": 1.4447406239168865, "learning_rate": 2.1549093938327826e-08, "loss": 0.7353, "step": 15175 }, { "epoch": 0.97, "grad_norm": 8.081132640345205, "learning_rate": 2.145306876981168e-08, "loss": 0.6973, "step": 15176 }, { "epoch": 0.97, "grad_norm": 3.8812711668216617, "learning_rate": 2.135725756878093e-08, "loss": 0.602, "step": 15177 }, { "epoch": 0.97, "grad_norm": 1.492437563600861, "learning_rate": 2.1261660339354505e-08, "loss": 0.5592, "step": 15178 }, { "epoch": 0.97, "grad_norm": 1.5637590598842577, "learning_rate": 2.116627708563912e-08, "loss": 0.6508, "step": 15179 }, { "epoch": 0.97, "grad_norm": 1.5142138023615628, "learning_rate": 2.1071107811735382e-08, "loss": 0.7264, "step": 15180 }, { "epoch": 0.97, "grad_norm": 1.4432976223860647, "learning_rate": 2.0976152521733905e-08, "loss": 0.6378, "step": 15181 }, { "epoch": 0.97, "grad_norm": 1.4162956180211286, "learning_rate": 2.088141121971421e-08, "loss": 0.5951, "step": 15182 }, { "epoch": 0.97, "grad_norm": 1.0626166289173042, "learning_rate": 2.07868839097497e-08, "loss": 0.5458, "step": 15183 }, { "epoch": 0.97, "grad_norm": 2.283711875316876, "learning_rate": 2.0692570595901572e-08, "loss": 0.6591, "step": 15184 }, { "epoch": 0.97, "grad_norm": 1.6565007008346622, "learning_rate": 2.059847128222381e-08, "loss": 0.6901, "step": 15185 }, { "epoch": 0.97, "grad_norm": 2.2762093717156286, "learning_rate": 2.0504585972760394e-08, "loss": 0.5938, "step": 15186 }, { "epoch": 0.97, "grad_norm": 1.730506242723623, "learning_rate": 2.041091467154699e-08, "loss": 0.6214, "step": 15187 }, { "epoch": 0.97, "grad_norm": 1.8925398840598375, "learning_rate": 2.0317457382608706e-08, "loss": 0.7694, "step": 15188 }, { "epoch": 0.97, "grad_norm": 1.4490671918483329, "learning_rate": 2.022421410996234e-08, "loss": 0.6006, "step": 15189 }, { "epoch": 0.97, "grad_norm": 1.5363498593203513, "learning_rate": 2.0131184857615783e-08, "loss": 0.6486, "step": 15190 }, { "epoch": 0.97, "grad_norm": 1.4863597536717845, "learning_rate": 2.0038369629565846e-08, "loss": 0.7154, "step": 15191 }, { "epoch": 0.97, "grad_norm": 2.2712758634670442, "learning_rate": 1.9945768429803226e-08, "loss": 0.6058, "step": 15192 }, { "epoch": 0.97, "grad_norm": 1.6350688248789886, "learning_rate": 1.9853381262306405e-08, "loss": 0.6972, "step": 15193 }, { "epoch": 0.97, "grad_norm": 1.4479000129702169, "learning_rate": 1.976120813104665e-08, "loss": 0.7313, "step": 15194 }, { "epoch": 0.97, "grad_norm": 1.083148932236924, "learning_rate": 1.9669249039985794e-08, "loss": 0.7009, "step": 15195 }, { "epoch": 0.97, "grad_norm": 1.7468256800214437, "learning_rate": 1.957750399307512e-08, "loss": 0.7322, "step": 15196 }, { "epoch": 0.97, "grad_norm": 1.0643957292089172, "learning_rate": 1.9485972994257584e-08, "loss": 0.6713, "step": 15197 }, { "epoch": 0.97, "grad_norm": 1.5639677663472495, "learning_rate": 1.9394656047467818e-08, "loss": 0.5927, "step": 15198 }, { "epoch": 0.97, "grad_norm": 1.515638818497605, "learning_rate": 1.9303553156630462e-08, "loss": 0.6587, "step": 15199 }, { "epoch": 0.97, "grad_norm": 1.7878101702676208, "learning_rate": 1.92126643256596e-08, "loss": 0.6111, "step": 15200 }, { "epoch": 0.97, "grad_norm": 1.6743293482293717, "learning_rate": 1.912198955846323e-08, "loss": 0.7569, "step": 15201 }, { "epoch": 0.97, "grad_norm": 1.2377996664066564, "learning_rate": 1.9031528858936556e-08, "loss": 0.5904, "step": 15202 }, { "epoch": 0.97, "grad_norm": 1.5157814961673088, "learning_rate": 1.894128223096925e-08, "loss": 0.6173, "step": 15203 }, { "epoch": 0.97, "grad_norm": 1.6749672734299592, "learning_rate": 1.885124967843821e-08, "loss": 0.6509, "step": 15204 }, { "epoch": 0.97, "grad_norm": 1.1912357632420578, "learning_rate": 1.8761431205214232e-08, "loss": 0.6511, "step": 15205 }, { "epoch": 0.97, "grad_norm": 1.1295818572823566, "learning_rate": 1.8671826815156448e-08, "loss": 0.6126, "step": 15206 }, { "epoch": 0.97, "grad_norm": 1.750967021291538, "learning_rate": 1.8582436512116776e-08, "loss": 0.5969, "step": 15207 }, { "epoch": 0.97, "grad_norm": 1.2307246227305735, "learning_rate": 1.849326029993659e-08, "loss": 0.5959, "step": 15208 }, { "epoch": 0.97, "grad_norm": 0.9373087515695383, "learning_rate": 1.8404298182447823e-08, "loss": 0.6264, "step": 15209 }, { "epoch": 0.97, "grad_norm": 1.6638226256785678, "learning_rate": 1.8315550163475194e-08, "loss": 0.6379, "step": 15210 }, { "epoch": 0.97, "grad_norm": 1.2909295831407877, "learning_rate": 1.8227016246831764e-08, "loss": 0.6271, "step": 15211 }, { "epoch": 0.97, "grad_norm": 1.4183573413481252, "learning_rate": 1.8138696436323377e-08, "loss": 0.6321, "step": 15212 }, { "epoch": 0.97, "grad_norm": 1.6325601557567269, "learning_rate": 1.8050590735745334e-08, "loss": 0.6082, "step": 15213 }, { "epoch": 0.97, "grad_norm": 1.681660519313713, "learning_rate": 1.79626991488846e-08, "loss": 0.6356, "step": 15214 }, { "epoch": 0.97, "grad_norm": 1.3998015783359108, "learning_rate": 1.7875021679518156e-08, "loss": 0.5372, "step": 15215 }, { "epoch": 0.97, "grad_norm": 1.5892606641298657, "learning_rate": 1.7787558331414655e-08, "loss": 0.7066, "step": 15216 }, { "epoch": 0.97, "grad_norm": 1.6824091937074326, "learning_rate": 1.7700309108332204e-08, "loss": 0.6391, "step": 15217 }, { "epoch": 0.97, "grad_norm": 1.774028778352652, "learning_rate": 1.7613274014021686e-08, "loss": 0.6163, "step": 15218 }, { "epoch": 0.97, "grad_norm": 1.4526732496679307, "learning_rate": 1.7526453052223446e-08, "loss": 0.6473, "step": 15219 }, { "epoch": 0.97, "grad_norm": 1.1860443510938727, "learning_rate": 1.743984622666839e-08, "loss": 0.7125, "step": 15220 }, { "epoch": 0.97, "grad_norm": 1.0891107851555866, "learning_rate": 1.7353453541078534e-08, "loss": 0.6183, "step": 15221 }, { "epoch": 0.97, "grad_norm": 1.6242906673275423, "learning_rate": 1.7267274999168138e-08, "loss": 0.6622, "step": 15222 }, { "epoch": 0.97, "grad_norm": 1.3485280715656682, "learning_rate": 1.7181310604639236e-08, "loss": 0.6226, "step": 15223 }, { "epoch": 0.97, "grad_norm": 1.0163021920168684, "learning_rate": 1.7095560361187758e-08, "loss": 0.6301, "step": 15224 }, { "epoch": 0.97, "grad_norm": 1.5493383468224582, "learning_rate": 1.701002427249854e-08, "loss": 0.5884, "step": 15225 }, { "epoch": 0.97, "grad_norm": 1.5577671186695976, "learning_rate": 1.6924702342247522e-08, "loss": 0.6049, "step": 15226 }, { "epoch": 0.97, "grad_norm": 1.192822815700965, "learning_rate": 1.683959457410178e-08, "loss": 0.6692, "step": 15227 }, { "epoch": 0.97, "grad_norm": 1.8127280856673755, "learning_rate": 1.6754700971719496e-08, "loss": 0.681, "step": 15228 }, { "epoch": 0.97, "grad_norm": 1.6280439705774206, "learning_rate": 1.667002153874886e-08, "loss": 0.7002, "step": 15229 }, { "epoch": 0.97, "grad_norm": 1.4910446298874753, "learning_rate": 1.658555627882974e-08, "loss": 0.621, "step": 15230 }, { "epoch": 0.97, "grad_norm": 1.4854221230522244, "learning_rate": 1.65013051955909e-08, "loss": 0.643, "step": 15231 }, { "epoch": 0.97, "grad_norm": 1.6304136683711405, "learning_rate": 1.6417268292655e-08, "loss": 0.7004, "step": 15232 }, { "epoch": 0.98, "grad_norm": 1.561738348349214, "learning_rate": 1.6333445573632478e-08, "loss": 0.5661, "step": 15233 }, { "epoch": 0.98, "grad_norm": 1.6510087703653928, "learning_rate": 1.624983704212657e-08, "loss": 0.669, "step": 15234 }, { "epoch": 0.98, "grad_norm": 1.6771122127045566, "learning_rate": 1.6166442701730513e-08, "loss": 0.7274, "step": 15235 }, { "epoch": 0.98, "grad_norm": 1.7124657694952687, "learning_rate": 1.6083262556027545e-08, "loss": 0.6225, "step": 15236 }, { "epoch": 0.98, "grad_norm": 1.5774847785429071, "learning_rate": 1.6000296608594257e-08, "loss": 0.6038, "step": 15237 }, { "epoch": 0.98, "grad_norm": 1.5132595860524893, "learning_rate": 1.5917544862995016e-08, "loss": 0.5689, "step": 15238 }, { "epoch": 0.98, "grad_norm": 1.5775149404835076, "learning_rate": 1.5835007322786424e-08, "loss": 0.6829, "step": 15239 }, { "epoch": 0.98, "grad_norm": 1.4418407591751148, "learning_rate": 1.5752683991516195e-08, "loss": 0.7179, "step": 15240 }, { "epoch": 0.98, "grad_norm": 1.3700388392370857, "learning_rate": 1.567057487272261e-08, "loss": 0.6764, "step": 15241 }, { "epoch": 0.98, "grad_norm": 1.7066642900140903, "learning_rate": 1.5588679969933406e-08, "loss": 0.7307, "step": 15242 }, { "epoch": 0.98, "grad_norm": 1.6162757486171029, "learning_rate": 1.5506999286669656e-08, "loss": 0.7293, "step": 15243 }, { "epoch": 0.98, "grad_norm": 1.5184746039345627, "learning_rate": 1.5425532826441326e-08, "loss": 0.652, "step": 15244 }, { "epoch": 0.98, "grad_norm": 1.639362436429485, "learning_rate": 1.534428059274895e-08, "loss": 0.7127, "step": 15245 }, { "epoch": 0.98, "grad_norm": 1.2999058651825623, "learning_rate": 1.526324258908585e-08, "loss": 0.6885, "step": 15246 }, { "epoch": 0.98, "grad_norm": 1.7422847682850353, "learning_rate": 1.5182418818933676e-08, "loss": 0.7664, "step": 15247 }, { "epoch": 0.98, "grad_norm": 1.7004460577189306, "learning_rate": 1.5101809285766877e-08, "loss": 0.6042, "step": 15248 }, { "epoch": 0.98, "grad_norm": 0.9924627133902726, "learning_rate": 1.502141399304935e-08, "loss": 0.5688, "step": 15249 }, { "epoch": 0.98, "grad_norm": 1.819596751907995, "learning_rate": 1.4941232944237217e-08, "loss": 0.7292, "step": 15250 }, { "epoch": 0.98, "grad_norm": 1.6831741842329468, "learning_rate": 1.4861266142775498e-08, "loss": 0.6615, "step": 15251 }, { "epoch": 0.98, "grad_norm": 1.5300429234817565, "learning_rate": 1.4781513592100893e-08, "loss": 0.6316, "step": 15252 }, { "epoch": 0.98, "grad_norm": 1.6797812814920037, "learning_rate": 1.4701975295641768e-08, "loss": 0.6578, "step": 15253 }, { "epoch": 0.98, "grad_norm": 1.4926302804327551, "learning_rate": 1.46226512568165e-08, "loss": 0.6838, "step": 15254 }, { "epoch": 0.98, "grad_norm": 1.8994099132986748, "learning_rate": 1.4543541479033473e-08, "loss": 0.6619, "step": 15255 }, { "epoch": 0.98, "grad_norm": 1.422597708115535, "learning_rate": 1.4464645965693303e-08, "loss": 0.6073, "step": 15256 }, { "epoch": 0.98, "grad_norm": 1.5825166503555197, "learning_rate": 1.4385964720187162e-08, "loss": 0.5918, "step": 15257 }, { "epoch": 0.98, "grad_norm": 1.5648676554611303, "learning_rate": 1.4307497745895127e-08, "loss": 0.704, "step": 15258 }, { "epoch": 0.98, "grad_norm": 1.6692509415349812, "learning_rate": 1.4229245046190609e-08, "loss": 0.6203, "step": 15259 }, { "epoch": 0.98, "grad_norm": 1.838209163305894, "learning_rate": 1.415120662443703e-08, "loss": 0.6376, "step": 15260 }, { "epoch": 0.98, "grad_norm": 2.7107669219090025, "learning_rate": 1.4073382483987819e-08, "loss": 0.6912, "step": 15261 }, { "epoch": 0.98, "grad_norm": 1.8007120131254288, "learning_rate": 1.399577262818752e-08, "loss": 0.6003, "step": 15262 }, { "epoch": 0.98, "grad_norm": 1.4940563765625805, "learning_rate": 1.3918377060371802e-08, "loss": 0.6312, "step": 15263 }, { "epoch": 0.98, "grad_norm": 1.455548249113469, "learning_rate": 1.3841195783867444e-08, "loss": 0.6668, "step": 15264 }, { "epoch": 0.98, "grad_norm": 1.4755110798396898, "learning_rate": 1.3764228801990686e-08, "loss": 0.6971, "step": 15265 }, { "epoch": 0.98, "grad_norm": 1.9896366176043518, "learning_rate": 1.3687476118049991e-08, "loss": 0.6496, "step": 15266 }, { "epoch": 0.98, "grad_norm": 1.9622082782490695, "learning_rate": 1.3610937735344387e-08, "loss": 0.6031, "step": 15267 }, { "epoch": 0.98, "grad_norm": 1.6308809103031978, "learning_rate": 1.3534613657162354e-08, "loss": 0.6983, "step": 15268 }, { "epoch": 0.98, "grad_norm": 1.5635007245004102, "learning_rate": 1.3458503886784603e-08, "loss": 0.7598, "step": 15269 }, { "epoch": 0.98, "grad_norm": 1.2333269952396806, "learning_rate": 1.3382608427482402e-08, "loss": 0.6299, "step": 15270 }, { "epoch": 0.98, "grad_norm": 1.4395221036278434, "learning_rate": 1.3306927282517034e-08, "loss": 0.6673, "step": 15271 }, { "epoch": 0.98, "grad_norm": 1.4111858251165208, "learning_rate": 1.3231460455141453e-08, "loss": 0.6319, "step": 15272 }, { "epoch": 0.98, "grad_norm": 1.6061802296416114, "learning_rate": 1.3156207948599176e-08, "loss": 0.6483, "step": 15273 }, { "epoch": 0.98, "grad_norm": 1.7812551059722244, "learning_rate": 1.308116976612428e-08, "loss": 0.6028, "step": 15274 }, { "epoch": 0.98, "grad_norm": 1.0764918681753175, "learning_rate": 1.3006345910941964e-08, "loss": 0.7295, "step": 15275 }, { "epoch": 0.98, "grad_norm": 1.5645979527918275, "learning_rate": 1.2931736386267435e-08, "loss": 0.724, "step": 15276 }, { "epoch": 0.98, "grad_norm": 1.7685660929252771, "learning_rate": 1.2857341195308126e-08, "loss": 0.8019, "step": 15277 }, { "epoch": 0.98, "grad_norm": 1.6220174686035096, "learning_rate": 1.278316034126037e-08, "loss": 0.6961, "step": 15278 }, { "epoch": 0.98, "grad_norm": 1.7369877942643672, "learning_rate": 1.2709193827312727e-08, "loss": 0.7145, "step": 15279 }, { "epoch": 0.98, "grad_norm": 1.1354011095128216, "learning_rate": 1.2635441656644876e-08, "loss": 0.6323, "step": 15280 }, { "epoch": 0.98, "grad_norm": 1.669423467317068, "learning_rate": 1.2561903832424837e-08, "loss": 0.57, "step": 15281 }, { "epoch": 0.98, "grad_norm": 1.5649231489531659, "learning_rate": 1.2488580357815083e-08, "loss": 0.6576, "step": 15282 }, { "epoch": 0.98, "grad_norm": 1.375872548739422, "learning_rate": 1.2415471235965315e-08, "loss": 0.6641, "step": 15283 }, { "epoch": 0.98, "grad_norm": 1.3952134289615727, "learning_rate": 1.2342576470018575e-08, "loss": 0.6107, "step": 15284 }, { "epoch": 0.98, "grad_norm": 1.3557393878934358, "learning_rate": 1.2269896063107356e-08, "loss": 0.661, "step": 15285 }, { "epoch": 0.98, "grad_norm": 1.4726606512392686, "learning_rate": 1.2197430018354717e-08, "loss": 0.6316, "step": 15286 }, { "epoch": 0.98, "grad_norm": 1.2660586553365905, "learning_rate": 1.2125178338876498e-08, "loss": 0.6026, "step": 15287 }, { "epoch": 0.98, "grad_norm": 1.537965101145211, "learning_rate": 1.2053141027776883e-08, "loss": 0.5918, "step": 15288 }, { "epoch": 0.98, "grad_norm": 9.415224615482215, "learning_rate": 1.1981318088152283e-08, "loss": 0.7565, "step": 15289 }, { "epoch": 0.98, "grad_norm": 1.330329581379305, "learning_rate": 1.190970952308912e-08, "loss": 0.5833, "step": 15290 }, { "epoch": 0.98, "grad_norm": 1.7272157960619139, "learning_rate": 1.1838315335664929e-08, "loss": 0.6908, "step": 15291 }, { "epoch": 0.98, "grad_norm": 1.3802604350793164, "learning_rate": 1.1767135528948925e-08, "loss": 0.6511, "step": 15292 }, { "epoch": 0.98, "grad_norm": 1.7972844321925474, "learning_rate": 1.1696170105999772e-08, "loss": 0.7314, "step": 15293 }, { "epoch": 0.98, "grad_norm": 1.5692201569460866, "learning_rate": 1.1625419069867249e-08, "loss": 0.6367, "step": 15294 }, { "epoch": 0.98, "grad_norm": 1.0380917355477528, "learning_rate": 1.1554882423591706e-08, "loss": 0.6776, "step": 15295 }, { "epoch": 0.98, "grad_norm": 2.2603446089684254, "learning_rate": 1.1484560170205716e-08, "loss": 0.5839, "step": 15296 }, { "epoch": 0.98, "grad_norm": 1.4430978398610645, "learning_rate": 1.141445231273075e-08, "loss": 0.6806, "step": 15297 }, { "epoch": 0.98, "grad_norm": 2.589527615823839, "learning_rate": 1.1344558854179955e-08, "loss": 0.5891, "step": 15298 }, { "epoch": 0.98, "grad_norm": 1.454524597232205, "learning_rate": 1.1274879797558148e-08, "loss": 0.6825, "step": 15299 }, { "epoch": 0.98, "grad_norm": 1.9105748333652217, "learning_rate": 1.120541514585849e-08, "loss": 0.7148, "step": 15300 }, { "epoch": 0.98, "grad_norm": 1.1410221841399444, "learning_rate": 1.1136164902067481e-08, "loss": 0.6552, "step": 15301 }, { "epoch": 0.98, "grad_norm": 1.5998969516289607, "learning_rate": 1.1067129069161076e-08, "loss": 0.5823, "step": 15302 }, { "epoch": 0.98, "grad_norm": 1.4782142219115977, "learning_rate": 1.0998307650106344e-08, "loss": 0.6664, "step": 15303 }, { "epoch": 0.98, "grad_norm": 1.606572349776119, "learning_rate": 1.092970064786092e-08, "loss": 0.6919, "step": 15304 }, { "epoch": 0.98, "grad_norm": 1.7507410799601497, "learning_rate": 1.0861308065373556e-08, "loss": 0.7354, "step": 15305 }, { "epoch": 0.98, "grad_norm": 2.226233078483368, "learning_rate": 1.0793129905583566e-08, "loss": 0.7496, "step": 15306 }, { "epoch": 0.98, "grad_norm": 1.5351688499739806, "learning_rate": 1.072516617142083e-08, "loss": 0.7087, "step": 15307 }, { "epoch": 0.98, "grad_norm": 1.6358916714518488, "learning_rate": 1.0657416865806902e-08, "loss": 0.6351, "step": 15308 }, { "epoch": 0.98, "grad_norm": 1.5145842663369014, "learning_rate": 1.0589881991652784e-08, "loss": 0.6305, "step": 15309 }, { "epoch": 0.98, "grad_norm": 2.3139545674693247, "learning_rate": 1.052256155186171e-08, "loss": 0.7045, "step": 15310 }, { "epoch": 0.98, "grad_norm": 1.379778800860068, "learning_rate": 1.0455455549326366e-08, "loss": 0.5626, "step": 15311 }, { "epoch": 0.98, "grad_norm": 1.3618010089012744, "learning_rate": 1.038856398693111e-08, "loss": 0.673, "step": 15312 }, { "epoch": 0.98, "grad_norm": 1.4934068107133014, "learning_rate": 1.0321886867550868e-08, "loss": 0.6358, "step": 15313 }, { "epoch": 0.98, "grad_norm": 1.5536489608109232, "learning_rate": 1.0255424194050567e-08, "loss": 0.6118, "step": 15314 }, { "epoch": 0.98, "grad_norm": 1.5889096930180875, "learning_rate": 1.0189175969287923e-08, "loss": 0.6508, "step": 15315 }, { "epoch": 0.98, "grad_norm": 1.6347655427955794, "learning_rate": 1.0123142196108993e-08, "loss": 0.631, "step": 15316 }, { "epoch": 0.98, "grad_norm": 2.6835521427769993, "learning_rate": 1.0057322877352616e-08, "loss": 0.6319, "step": 15317 }, { "epoch": 0.98, "grad_norm": 1.6063268819390788, "learning_rate": 9.991718015847085e-09, "loss": 0.6539, "step": 15318 }, { "epoch": 0.98, "grad_norm": 2.1921685141301994, "learning_rate": 9.926327614411813e-09, "loss": 0.6031, "step": 15319 }, { "epoch": 0.98, "grad_norm": 1.388294023871651, "learning_rate": 9.861151675857884e-09, "loss": 0.6119, "step": 15320 }, { "epoch": 0.98, "grad_norm": 1.7778805686328836, "learning_rate": 9.796190202985834e-09, "loss": 0.6134, "step": 15321 }, { "epoch": 0.98, "grad_norm": 1.6169336034844612, "learning_rate": 9.73144319858732e-09, "loss": 0.7455, "step": 15322 }, { "epoch": 0.98, "grad_norm": 1.8603900741846973, "learning_rate": 9.666910665445673e-09, "loss": 0.6731, "step": 15323 }, { "epoch": 0.98, "grad_norm": 1.1466200618314069, "learning_rate": 9.602592606333672e-09, "loss": 0.6717, "step": 15324 }, { "epoch": 0.98, "grad_norm": 1.4726044143164856, "learning_rate": 9.538489024016328e-09, "loss": 0.6172, "step": 15325 }, { "epoch": 0.98, "grad_norm": 1.4667938576581314, "learning_rate": 9.474599921248662e-09, "loss": 0.648, "step": 15326 }, { "epoch": 0.98, "grad_norm": 1.0826541943508015, "learning_rate": 9.410925300775697e-09, "loss": 0.593, "step": 15327 }, { "epoch": 0.98, "grad_norm": 1.6414808562603365, "learning_rate": 9.347465165334135e-09, "loss": 0.6871, "step": 15328 }, { "epoch": 0.98, "grad_norm": 1.3599622280866344, "learning_rate": 9.284219517652348e-09, "loss": 0.6715, "step": 15329 }, { "epoch": 0.98, "grad_norm": 1.4685454463106142, "learning_rate": 9.221188360447609e-09, "loss": 0.62, "step": 15330 }, { "epoch": 0.98, "grad_norm": 1.4809655343313817, "learning_rate": 9.158371696428859e-09, "loss": 0.592, "step": 15331 }, { "epoch": 0.98, "grad_norm": 1.7539937000164874, "learning_rate": 9.095769528296716e-09, "loss": 0.6134, "step": 15332 }, { "epoch": 0.98, "grad_norm": 1.704500450761371, "learning_rate": 9.033381858740697e-09, "loss": 0.7197, "step": 15333 }, { "epoch": 0.98, "grad_norm": 1.9131350843599912, "learning_rate": 8.971208690442545e-09, "loss": 0.6255, "step": 15334 }, { "epoch": 0.98, "grad_norm": 1.5008346339843905, "learning_rate": 8.909250026074013e-09, "loss": 0.6386, "step": 15335 }, { "epoch": 0.98, "grad_norm": 1.6214394839978066, "learning_rate": 8.847505868298522e-09, "loss": 0.6657, "step": 15336 }, { "epoch": 0.98, "grad_norm": 1.2151137346729748, "learning_rate": 8.785976219768954e-09, "loss": 0.733, "step": 15337 }, { "epoch": 0.98, "grad_norm": 1.8853107975089154, "learning_rate": 8.724661083130414e-09, "loss": 0.7836, "step": 15338 }, { "epoch": 0.98, "grad_norm": 1.638421884264375, "learning_rate": 8.663560461018016e-09, "loss": 0.6436, "step": 15339 }, { "epoch": 0.98, "grad_norm": 1.7469472232655807, "learning_rate": 8.60267435605744e-09, "loss": 0.64, "step": 15340 }, { "epoch": 0.98, "grad_norm": 1.0044709875880462, "learning_rate": 8.542002770865477e-09, "loss": 0.6135, "step": 15341 }, { "epoch": 0.98, "grad_norm": 2.0410764780946202, "learning_rate": 8.481545708049488e-09, "loss": 0.6671, "step": 15342 }, { "epoch": 0.98, "grad_norm": 1.345082260271201, "learning_rate": 8.421303170208505e-09, "loss": 0.6427, "step": 15343 }, { "epoch": 0.98, "grad_norm": 1.0429941163351286, "learning_rate": 8.36127515993046e-09, "loss": 0.6208, "step": 15344 }, { "epoch": 0.98, "grad_norm": 1.4707081654082395, "learning_rate": 8.301461679796619e-09, "loss": 0.6387, "step": 15345 }, { "epoch": 0.98, "grad_norm": 1.3815470224319535, "learning_rate": 8.241862732376593e-09, "loss": 0.5649, "step": 15346 }, { "epoch": 0.98, "grad_norm": 1.8906647240400294, "learning_rate": 8.182478320232223e-09, "loss": 0.6196, "step": 15347 }, { "epoch": 0.98, "grad_norm": 1.5782911573189349, "learning_rate": 8.123308445915912e-09, "loss": 0.6313, "step": 15348 }, { "epoch": 0.98, "grad_norm": 1.5355462160489328, "learning_rate": 8.06435311197007e-09, "loss": 0.6404, "step": 15349 }, { "epoch": 0.98, "grad_norm": 1.546455707601278, "learning_rate": 8.005612320929335e-09, "loss": 0.7087, "step": 15350 }, { "epoch": 0.98, "grad_norm": 1.7805449230296124, "learning_rate": 7.947086075317246e-09, "loss": 0.6884, "step": 15351 }, { "epoch": 0.98, "grad_norm": 1.5798757888183588, "learning_rate": 7.888774377650122e-09, "loss": 0.641, "step": 15352 }, { "epoch": 0.98, "grad_norm": 1.5957648761116365, "learning_rate": 7.830677230433181e-09, "loss": 0.6985, "step": 15353 }, { "epoch": 0.98, "grad_norm": 1.65814113446154, "learning_rate": 7.772794636163872e-09, "loss": 0.6637, "step": 15354 }, { "epoch": 0.98, "grad_norm": 1.4285084086635835, "learning_rate": 7.715126597329648e-09, "loss": 0.6624, "step": 15355 }, { "epoch": 0.98, "grad_norm": 1.5294890347375265, "learning_rate": 7.657673116409081e-09, "loss": 0.6934, "step": 15356 }, { "epoch": 0.98, "grad_norm": 1.5494888888561866, "learning_rate": 7.600434195871864e-09, "loss": 0.6803, "step": 15357 }, { "epoch": 0.98, "grad_norm": 1.7491776581805376, "learning_rate": 7.54340983817714e-09, "loss": 0.7443, "step": 15358 }, { "epoch": 0.98, "grad_norm": 1.8553134993075726, "learning_rate": 7.486600045775728e-09, "loss": 0.675, "step": 15359 }, { "epoch": 0.98, "grad_norm": 1.052952224477633, "learning_rate": 7.430004821110115e-09, "loss": 0.7135, "step": 15360 }, { "epoch": 0.98, "grad_norm": 1.5899481903038442, "learning_rate": 7.373624166611137e-09, "loss": 0.6827, "step": 15361 }, { "epoch": 0.98, "grad_norm": 1.1828236391006892, "learning_rate": 7.317458084704076e-09, "loss": 0.6933, "step": 15362 }, { "epoch": 0.98, "grad_norm": 1.7066948001834403, "learning_rate": 7.261506577800892e-09, "loss": 0.6364, "step": 15363 }, { "epoch": 0.98, "grad_norm": 1.6237745225972466, "learning_rate": 7.2057696483068816e-09, "loss": 0.7522, "step": 15364 }, { "epoch": 0.98, "grad_norm": 1.3928662958372375, "learning_rate": 7.1502472986179075e-09, "loss": 0.5935, "step": 15365 }, { "epoch": 0.98, "grad_norm": 1.6881674945185745, "learning_rate": 7.094939531119838e-09, "loss": 0.6926, "step": 15366 }, { "epoch": 0.98, "grad_norm": 1.4914288625816376, "learning_rate": 7.039846348189105e-09, "loss": 0.6173, "step": 15367 }, { "epoch": 0.98, "grad_norm": 1.6398765853677135, "learning_rate": 6.984967752194927e-09, "loss": 0.7156, "step": 15368 }, { "epoch": 0.98, "grad_norm": 1.0392721693987887, "learning_rate": 6.93030374549486e-09, "loss": 0.6105, "step": 15369 }, { "epoch": 0.98, "grad_norm": 1.442789374499475, "learning_rate": 6.875854330438136e-09, "loss": 0.6294, "step": 15370 }, { "epoch": 0.98, "grad_norm": 1.8340708008684319, "learning_rate": 6.8216195093656624e-09, "loss": 0.6637, "step": 15371 }, { "epoch": 0.98, "grad_norm": 1.5359797841910048, "learning_rate": 6.76759928460724e-09, "loss": 0.5869, "step": 15372 }, { "epoch": 0.98, "grad_norm": 1.6553460974333345, "learning_rate": 6.713793658486012e-09, "loss": 0.6863, "step": 15373 }, { "epoch": 0.98, "grad_norm": 1.725751067723852, "learning_rate": 6.6602026333129065e-09, "loss": 0.7331, "step": 15374 }, { "epoch": 0.98, "grad_norm": 1.5482805341062815, "learning_rate": 6.606826211392192e-09, "loss": 0.6084, "step": 15375 }, { "epoch": 0.98, "grad_norm": 1.629881788093788, "learning_rate": 6.553664395017589e-09, "loss": 0.6611, "step": 15376 }, { "epoch": 0.98, "grad_norm": 1.5921011124206281, "learning_rate": 6.5007171864733824e-09, "loss": 0.7027, "step": 15377 }, { "epoch": 0.98, "grad_norm": 1.424465114282664, "learning_rate": 6.4479845880360824e-09, "loss": 0.6549, "step": 15378 }, { "epoch": 0.98, "grad_norm": 1.5477165384742058, "learning_rate": 6.395466601971101e-09, "loss": 0.6364, "step": 15379 }, { "epoch": 0.98, "grad_norm": 1.5936296644266679, "learning_rate": 6.3431632305360754e-09, "loss": 0.6387, "step": 15380 }, { "epoch": 0.98, "grad_norm": 1.7034177814111848, "learning_rate": 6.291074475978653e-09, "loss": 0.6212, "step": 15381 }, { "epoch": 0.98, "grad_norm": 1.4858339461256111, "learning_rate": 6.239200340537599e-09, "loss": 0.5617, "step": 15382 }, { "epoch": 0.98, "grad_norm": 1.4708194241869799, "learning_rate": 6.187540826442795e-09, "loss": 0.6483, "step": 15383 }, { "epoch": 0.98, "grad_norm": 1.8331072958377055, "learning_rate": 6.136095935913578e-09, "loss": 0.6852, "step": 15384 }, { "epoch": 0.98, "grad_norm": 1.3724613028706167, "learning_rate": 6.084865671162066e-09, "loss": 0.6395, "step": 15385 }, { "epoch": 0.98, "grad_norm": 1.7845947615036601, "learning_rate": 6.033850034388722e-09, "loss": 0.6947, "step": 15386 }, { "epoch": 0.98, "grad_norm": 1.3761454600531613, "learning_rate": 5.983049027786791e-09, "loss": 0.611, "step": 15387 }, { "epoch": 0.98, "grad_norm": 1.4519547233085472, "learning_rate": 5.932462653539528e-09, "loss": 0.6042, "step": 15388 }, { "epoch": 0.98, "grad_norm": 1.7252853596804476, "learning_rate": 5.882090913821303e-09, "loss": 0.5943, "step": 15389 }, { "epoch": 0.99, "grad_norm": 1.065148055282506, "learning_rate": 5.831933810796497e-09, "loss": 0.7243, "step": 15390 }, { "epoch": 0.99, "grad_norm": 1.9876635907746638, "learning_rate": 5.781991346621163e-09, "loss": 0.7412, "step": 15391 }, { "epoch": 0.99, "grad_norm": 1.5658284199505785, "learning_rate": 5.7322635234413615e-09, "loss": 0.6181, "step": 15392 }, { "epoch": 0.99, "grad_norm": 1.8885741181890983, "learning_rate": 5.682750343394827e-09, "loss": 0.6823, "step": 15393 }, { "epoch": 0.99, "grad_norm": 1.3904671956053787, "learning_rate": 5.633451808608747e-09, "loss": 0.6235, "step": 15394 }, { "epoch": 0.99, "grad_norm": 1.1077779480983019, "learning_rate": 5.584367921202538e-09, "loss": 0.6109, "step": 15395 }, { "epoch": 0.99, "grad_norm": 1.5931250717937089, "learning_rate": 5.535498683285623e-09, "loss": 0.7038, "step": 15396 }, { "epoch": 0.99, "grad_norm": 1.9479956222372719, "learning_rate": 5.486844096957988e-09, "loss": 0.6713, "step": 15397 }, { "epoch": 0.99, "grad_norm": 1.5782944846086373, "learning_rate": 5.43840416431074e-09, "loss": 0.6997, "step": 15398 }, { "epoch": 0.99, "grad_norm": 1.5217014010004979, "learning_rate": 5.390178887426101e-09, "loss": 0.6789, "step": 15399 }, { "epoch": 0.99, "grad_norm": 1.6834274746922973, "learning_rate": 5.342168268376302e-09, "loss": 0.6378, "step": 15400 }, { "epoch": 0.99, "grad_norm": 1.203591538996202, "learning_rate": 5.294372309224693e-09, "loss": 0.6913, "step": 15401 }, { "epoch": 0.99, "grad_norm": 1.5354045071565892, "learning_rate": 5.246791012025743e-09, "loss": 0.6407, "step": 15402 }, { "epoch": 0.99, "grad_norm": 1.4919833308439723, "learning_rate": 5.19942437882448e-09, "loss": 0.7046, "step": 15403 }, { "epoch": 0.99, "grad_norm": 1.7050320525870426, "learning_rate": 5.1522724116565005e-09, "loss": 0.6412, "step": 15404 }, { "epoch": 0.99, "grad_norm": 1.5038884353930824, "learning_rate": 5.10533511254796e-09, "loss": 0.5391, "step": 15405 }, { "epoch": 0.99, "grad_norm": 1.9583184943779266, "learning_rate": 5.05861248351669e-09, "loss": 0.8083, "step": 15406 }, { "epoch": 0.99, "grad_norm": 1.4144321493854464, "learning_rate": 5.012104526569972e-09, "loss": 0.6196, "step": 15407 }, { "epoch": 0.99, "grad_norm": 1.5915403411348175, "learning_rate": 4.965811243707319e-09, "loss": 0.6119, "step": 15408 }, { "epoch": 0.99, "grad_norm": 1.6317598858467424, "learning_rate": 4.919732636918251e-09, "loss": 0.6434, "step": 15409 }, { "epoch": 0.99, "grad_norm": 1.6761037572418918, "learning_rate": 4.873868708182849e-09, "loss": 0.6742, "step": 15410 }, { "epoch": 0.99, "grad_norm": 1.1688157514032593, "learning_rate": 4.8282194594723164e-09, "loss": 0.6912, "step": 15411 }, { "epoch": 0.99, "grad_norm": 1.3804608642776874, "learning_rate": 4.782784892748971e-09, "loss": 0.6582, "step": 15412 }, { "epoch": 0.99, "grad_norm": 1.4609129399237404, "learning_rate": 4.737565009964584e-09, "loss": 0.6264, "step": 15413 }, { "epoch": 0.99, "grad_norm": 1.9339285220502387, "learning_rate": 4.692559813063713e-09, "loss": 0.5843, "step": 15414 }, { "epoch": 0.99, "grad_norm": 1.6085275808033048, "learning_rate": 4.647769303979255e-09, "loss": 0.6562, "step": 15415 }, { "epoch": 0.99, "grad_norm": 1.7743110531764978, "learning_rate": 4.6031934846380024e-09, "loss": 0.6589, "step": 15416 }, { "epoch": 0.99, "grad_norm": 1.8143713171185638, "learning_rate": 4.558832356953979e-09, "loss": 0.6125, "step": 15417 }, { "epoch": 0.99, "grad_norm": 1.375813762485061, "learning_rate": 4.514685922834549e-09, "loss": 0.6381, "step": 15418 }, { "epoch": 0.99, "grad_norm": 1.1503179926613376, "learning_rate": 4.470754184177084e-09, "loss": 0.6497, "step": 15419 }, { "epoch": 0.99, "grad_norm": 2.2259083062876357, "learning_rate": 4.427037142869517e-09, "loss": 0.6658, "step": 15420 }, { "epoch": 0.99, "grad_norm": 1.5309361885380433, "learning_rate": 4.383534800790901e-09, "loss": 0.6743, "step": 15421 }, { "epoch": 0.99, "grad_norm": 1.6082206073155119, "learning_rate": 4.3402471598102956e-09, "loss": 0.6634, "step": 15422 }, { "epoch": 0.99, "grad_norm": 1.312212669234216, "learning_rate": 4.297174221788991e-09, "loss": 0.6501, "step": 15423 }, { "epoch": 0.99, "grad_norm": 1.5684752573135645, "learning_rate": 4.254315988577729e-09, "loss": 0.6681, "step": 15424 }, { "epoch": 0.99, "grad_norm": 1.57736353610651, "learning_rate": 4.211672462018368e-09, "loss": 0.6841, "step": 15425 }, { "epoch": 0.99, "grad_norm": 1.6222304515991404, "learning_rate": 4.169243643943333e-09, "loss": 0.7181, "step": 15426 }, { "epoch": 0.99, "grad_norm": 1.7035136067103693, "learning_rate": 4.127029536177274e-09, "loss": 0.7094, "step": 15427 }, { "epoch": 0.99, "grad_norm": 1.4692082484531352, "learning_rate": 4.085030140533186e-09, "loss": 0.6326, "step": 15428 }, { "epoch": 0.99, "grad_norm": 1.498412012967483, "learning_rate": 4.043245458816847e-09, "loss": 0.6313, "step": 15429 }, { "epoch": 0.99, "grad_norm": 1.748451281317523, "learning_rate": 4.001675492823487e-09, "loss": 0.6667, "step": 15430 }, { "epoch": 0.99, "grad_norm": 1.5169361519832074, "learning_rate": 3.960320244340565e-09, "loss": 0.6498, "step": 15431 }, { "epoch": 0.99, "grad_norm": 1.531721785032541, "learning_rate": 3.919179715144439e-09, "loss": 0.6751, "step": 15432 }, { "epoch": 0.99, "grad_norm": 1.6740517403550235, "learning_rate": 3.878253907004248e-09, "loss": 0.6118, "step": 15433 }, { "epoch": 0.99, "grad_norm": 1.5955769399368063, "learning_rate": 3.837542821678031e-09, "loss": 0.6156, "step": 15434 }, { "epoch": 0.99, "grad_norm": 1.62462880358925, "learning_rate": 3.797046460916054e-09, "loss": 0.6555, "step": 15435 }, { "epoch": 0.99, "grad_norm": 1.1772478268774067, "learning_rate": 3.7567648264585924e-09, "loss": 0.7105, "step": 15436 }, { "epoch": 0.99, "grad_norm": 2.017722028952393, "learning_rate": 3.716697920036483e-09, "loss": 0.6568, "step": 15437 }, { "epoch": 0.99, "grad_norm": 1.7133811749122019, "learning_rate": 3.6768457433727924e-09, "loss": 0.7174, "step": 15438 }, { "epoch": 0.99, "grad_norm": 1.4865869950929063, "learning_rate": 3.637208298178929e-09, "loss": 0.6902, "step": 15439 }, { "epoch": 0.99, "grad_norm": 1.480691253882336, "learning_rate": 3.597785586159086e-09, "loss": 0.7129, "step": 15440 }, { "epoch": 0.99, "grad_norm": 1.7032560906918388, "learning_rate": 3.558577609007463e-09, "loss": 0.6644, "step": 15441 }, { "epoch": 0.99, "grad_norm": 1.5777938549924884, "learning_rate": 3.519584368409379e-09, "loss": 0.8106, "step": 15442 }, { "epoch": 0.99, "grad_norm": 1.1320573618880627, "learning_rate": 3.480805866040715e-09, "loss": 0.695, "step": 15443 }, { "epoch": 0.99, "grad_norm": 1.6893199158738066, "learning_rate": 3.4422421035679167e-09, "loss": 0.6191, "step": 15444 }, { "epoch": 0.99, "grad_norm": 1.787369817118066, "learning_rate": 3.403893082647991e-09, "loss": 0.7333, "step": 15445 }, { "epoch": 0.99, "grad_norm": 1.0765342911303108, "learning_rate": 3.36575880492962e-09, "loss": 0.6924, "step": 15446 }, { "epoch": 0.99, "grad_norm": 1.5954242856782368, "learning_rate": 3.3278392720514917e-09, "loss": 0.7477, "step": 15447 }, { "epoch": 0.99, "grad_norm": 1.5337707880402855, "learning_rate": 3.2901344856434147e-09, "loss": 0.6057, "step": 15448 }, { "epoch": 0.99, "grad_norm": 1.8744439392803471, "learning_rate": 3.252644447325759e-09, "loss": 0.7574, "step": 15449 }, { "epoch": 0.99, "grad_norm": 1.4083318891058614, "learning_rate": 3.2153691587094583e-09, "loss": 0.6438, "step": 15450 }, { "epoch": 0.99, "grad_norm": 1.5491948079699744, "learning_rate": 3.1783086213976743e-09, "loss": 0.6323, "step": 15451 }, { "epoch": 0.99, "grad_norm": 1.3486259699575414, "learning_rate": 3.1414628369819123e-09, "loss": 0.6342, "step": 15452 }, { "epoch": 0.99, "grad_norm": 1.6914757684038353, "learning_rate": 3.10483180704646e-09, "loss": 0.6288, "step": 15453 }, { "epoch": 0.99, "grad_norm": 1.5011472258629492, "learning_rate": 3.0684155331650587e-09, "loss": 0.6752, "step": 15454 }, { "epoch": 0.99, "grad_norm": 1.6169651036633537, "learning_rate": 3.0322140169036784e-09, "loss": 0.6566, "step": 15455 }, { "epoch": 0.99, "grad_norm": 1.9118982720766473, "learning_rate": 2.996227259817186e-09, "loss": 0.6401, "step": 15456 }, { "epoch": 0.99, "grad_norm": 1.5022014166077773, "learning_rate": 2.9604552634532325e-09, "loss": 0.6002, "step": 15457 }, { "epoch": 0.99, "grad_norm": 1.5979976585456417, "learning_rate": 2.9248980293483664e-09, "loss": 0.6579, "step": 15458 }, { "epoch": 0.99, "grad_norm": 1.4794420445030971, "learning_rate": 2.88955555903081e-09, "loss": 0.6612, "step": 15459 }, { "epoch": 0.99, "grad_norm": 1.4824314460903212, "learning_rate": 2.854427854019903e-09, "loss": 0.5777, "step": 15460 }, { "epoch": 0.99, "grad_norm": 2.1902316005767593, "learning_rate": 2.81951491582555e-09, "loss": 0.5994, "step": 15461 }, { "epoch": 0.99, "grad_norm": 2.0380826759322175, "learning_rate": 2.7848167459476605e-09, "loss": 0.6177, "step": 15462 }, { "epoch": 0.99, "grad_norm": 1.7952475099443428, "learning_rate": 2.7503333458778205e-09, "loss": 0.712, "step": 15463 }, { "epoch": 0.99, "grad_norm": 1.5381532366563135, "learning_rate": 2.716064717098177e-09, "loss": 0.6439, "step": 15464 }, { "epoch": 0.99, "grad_norm": 1.5254214329095803, "learning_rate": 2.682010861080886e-09, "loss": 0.6446, "step": 15465 }, { "epoch": 0.99, "grad_norm": 1.5026058980416308, "learning_rate": 2.6481717792903316e-09, "loss": 0.7202, "step": 15466 }, { "epoch": 0.99, "grad_norm": 1.2948623505177168, "learning_rate": 2.614547473180351e-09, "loss": 0.6174, "step": 15467 }, { "epoch": 0.99, "grad_norm": 1.0866087972034675, "learning_rate": 2.5811379441964547e-09, "loss": 0.6729, "step": 15468 }, { "epoch": 0.99, "grad_norm": 1.4855375574122995, "learning_rate": 2.5479431937736055e-09, "loss": 0.6722, "step": 15469 }, { "epoch": 0.99, "grad_norm": 1.597724003592761, "learning_rate": 2.5149632233395503e-09, "loss": 0.6309, "step": 15470 }, { "epoch": 0.99, "grad_norm": 2.220842725692472, "learning_rate": 2.482198034310934e-09, "loss": 0.7014, "step": 15471 }, { "epoch": 0.99, "grad_norm": 1.8002577373026867, "learning_rate": 2.44964762809663e-09, "loss": 0.6346, "step": 15472 }, { "epoch": 0.99, "grad_norm": 1.8268229000379388, "learning_rate": 2.417312006094963e-09, "loss": 0.7003, "step": 15473 }, { "epoch": 0.99, "grad_norm": 1.664850742180112, "learning_rate": 2.3851911696959327e-09, "loss": 0.5624, "step": 15474 }, { "epoch": 0.99, "grad_norm": 1.780572612775106, "learning_rate": 2.353285120279547e-09, "loss": 0.6956, "step": 15475 }, { "epoch": 0.99, "grad_norm": 1.5544944120389088, "learning_rate": 2.321593859218041e-09, "loss": 0.7241, "step": 15476 }, { "epoch": 0.99, "grad_norm": 1.7907106516597844, "learning_rate": 2.290117387872548e-09, "loss": 0.6764, "step": 15477 }, { "epoch": 0.99, "grad_norm": 1.5358545720945576, "learning_rate": 2.258855707595875e-09, "loss": 0.6824, "step": 15478 }, { "epoch": 0.99, "grad_norm": 1.591264644687052, "learning_rate": 2.227808819732502e-09, "loss": 0.5685, "step": 15479 }, { "epoch": 0.99, "grad_norm": 1.472196440264914, "learning_rate": 2.196976725615807e-09, "loss": 0.6579, "step": 15480 }, { "epoch": 0.99, "grad_norm": 2.041649131080073, "learning_rate": 2.166359426570841e-09, "loss": 0.7062, "step": 15481 }, { "epoch": 0.99, "grad_norm": 1.8208295689069722, "learning_rate": 2.1359569239143283e-09, "loss": 0.7177, "step": 15482 }, { "epoch": 0.99, "grad_norm": 1.4096336430395484, "learning_rate": 2.105769218952447e-09, "loss": 0.6986, "step": 15483 }, { "epoch": 0.99, "grad_norm": 1.5239762721334567, "learning_rate": 2.075796312982492e-09, "loss": 0.5586, "step": 15484 }, { "epoch": 0.99, "grad_norm": 1.7263961209583996, "learning_rate": 2.0460382072928775e-09, "loss": 0.6705, "step": 15485 }, { "epoch": 0.99, "grad_norm": 1.4451774332554659, "learning_rate": 2.016494903162025e-09, "loss": 0.6664, "step": 15486 }, { "epoch": 0.99, "grad_norm": 1.5592804075946811, "learning_rate": 1.98716640186003e-09, "loss": 0.6837, "step": 15487 }, { "epoch": 0.99, "grad_norm": 1.714582292801114, "learning_rate": 1.9580527046475506e-09, "loss": 0.6152, "step": 15488 }, { "epoch": 0.99, "grad_norm": 1.7656433744391633, "learning_rate": 1.929153812775808e-09, "loss": 0.7082, "step": 15489 }, { "epoch": 0.99, "grad_norm": 1.5475705607001196, "learning_rate": 1.900469727486587e-09, "loss": 0.6556, "step": 15490 }, { "epoch": 0.99, "grad_norm": 1.5012005939194926, "learning_rate": 1.8720004500122347e-09, "loss": 0.6416, "step": 15491 }, { "epoch": 0.99, "grad_norm": 1.3745103055646521, "learning_rate": 1.8437459815773274e-09, "loss": 0.6291, "step": 15492 }, { "epoch": 0.99, "grad_norm": 2.0534952802879554, "learning_rate": 1.8157063233953387e-09, "loss": 0.7549, "step": 15493 }, { "epoch": 0.99, "grad_norm": 1.397206341085702, "learning_rate": 1.787881476671971e-09, "loss": 0.5557, "step": 15494 }, { "epoch": 0.99, "grad_norm": 1.1160126910726205, "learning_rate": 1.7602714426023792e-09, "loss": 0.6227, "step": 15495 }, { "epoch": 0.99, "grad_norm": 1.413855977480795, "learning_rate": 1.732876222373947e-09, "loss": 0.6442, "step": 15496 }, { "epoch": 0.99, "grad_norm": 1.6587266363602455, "learning_rate": 1.7056958171635108e-09, "loss": 0.6208, "step": 15497 }, { "epoch": 0.99, "grad_norm": 1.0926602265085508, "learning_rate": 1.67873022813958e-09, "loss": 0.5894, "step": 15498 }, { "epoch": 0.99, "grad_norm": 1.1754720910625038, "learning_rate": 1.6519794564606728e-09, "loss": 0.6645, "step": 15499 }, { "epoch": 0.99, "grad_norm": 1.4920062056593988, "learning_rate": 1.6254435032764248e-09, "loss": 0.5857, "step": 15500 }, { "epoch": 0.99, "grad_norm": 1.4575662598375736, "learning_rate": 1.5991223697281454e-09, "loss": 0.7043, "step": 15501 }, { "epoch": 0.99, "grad_norm": 1.4108172350289478, "learning_rate": 1.5730160569460418e-09, "loss": 0.632, "step": 15502 }, { "epoch": 0.99, "grad_norm": 1.5513065043294134, "learning_rate": 1.5471245660531042e-09, "loss": 0.6939, "step": 15503 }, { "epoch": 0.99, "grad_norm": 1.5637988446821764, "learning_rate": 1.5214478981612212e-09, "loss": 0.707, "step": 15504 }, { "epoch": 0.99, "grad_norm": 1.5295351445484133, "learning_rate": 1.4959860543739546e-09, "loss": 0.5846, "step": 15505 }, { "epoch": 0.99, "grad_norm": 1.5838790041889874, "learning_rate": 1.4707390357865391e-09, "loss": 0.7037, "step": 15506 }, { "epoch": 0.99, "grad_norm": 1.5312039354519829, "learning_rate": 1.4457068434831078e-09, "loss": 0.7716, "step": 15507 }, { "epoch": 0.99, "grad_norm": 1.934605475075228, "learning_rate": 1.4208894785394666e-09, "loss": 0.65, "step": 15508 }, { "epoch": 0.99, "grad_norm": 1.2839828068679933, "learning_rate": 1.3962869420230951e-09, "loss": 0.6254, "step": 15509 }, { "epoch": 0.99, "grad_norm": 1.5460128995805367, "learning_rate": 1.3718992349903704e-09, "loss": 0.6254, "step": 15510 }, { "epoch": 0.99, "grad_norm": 1.4517633618460724, "learning_rate": 1.3477263584904533e-09, "loss": 0.7089, "step": 15511 }, { "epoch": 0.99, "grad_norm": 1.2697714265547062, "learning_rate": 1.323768313560847e-09, "loss": 0.5608, "step": 15512 }, { "epoch": 0.99, "grad_norm": 1.6337329897323856, "learning_rate": 1.3000251012323939e-09, "loss": 0.6579, "step": 15513 }, { "epoch": 0.99, "grad_norm": 1.285944817388756, "learning_rate": 1.2764967225253888e-09, "loss": 0.6459, "step": 15514 }, { "epoch": 0.99, "grad_norm": 1.522678877777395, "learning_rate": 1.2531831784506898e-09, "loss": 0.6567, "step": 15515 }, { "epoch": 0.99, "grad_norm": 1.4226798131699216, "learning_rate": 1.2300844700097181e-09, "loss": 0.6206, "step": 15516 }, { "epoch": 0.99, "grad_norm": 1.187168750064359, "learning_rate": 1.2072005981966783e-09, "loss": 0.7106, "step": 15517 }, { "epoch": 0.99, "grad_norm": 1.7551040355307352, "learning_rate": 1.1845315639935628e-09, "loss": 0.5897, "step": 15518 }, { "epoch": 0.99, "grad_norm": 1.5259522102797431, "learning_rate": 1.1620773683757025e-09, "loss": 0.6173, "step": 15519 }, { "epoch": 0.99, "grad_norm": 2.022077568777148, "learning_rate": 1.1398380123078812e-09, "loss": 0.6771, "step": 15520 }, { "epoch": 0.99, "grad_norm": 1.6479132947368638, "learning_rate": 1.1178134967454456e-09, "loss": 0.6708, "step": 15521 }, { "epoch": 0.99, "grad_norm": 1.9550300527672984, "learning_rate": 1.0960038226354165e-09, "loss": 0.6486, "step": 15522 }, { "epoch": 0.99, "grad_norm": 1.5524478950803071, "learning_rate": 1.0744089909153765e-09, "loss": 0.6825, "step": 15523 }, { "epoch": 0.99, "grad_norm": 1.6978666330732142, "learning_rate": 1.0530290025123623e-09, "loss": 0.7244, "step": 15524 }, { "epoch": 0.99, "grad_norm": 1.5522748241053925, "learning_rate": 1.0318638583467489e-09, "loss": 0.7746, "step": 15525 }, { "epoch": 0.99, "grad_norm": 1.617545065825013, "learning_rate": 1.0109135593266983e-09, "loss": 0.6065, "step": 15526 }, { "epoch": 0.99, "grad_norm": 1.4658633070328129, "learning_rate": 9.901781063531568e-10, "loss": 0.6861, "step": 15527 }, { "epoch": 0.99, "grad_norm": 1.638271052472636, "learning_rate": 9.696575003176334e-10, "loss": 0.5977, "step": 15528 }, { "epoch": 0.99, "grad_norm": 1.6503494983434381, "learning_rate": 9.493517421022003e-10, "loss": 0.7518, "step": 15529 }, { "epoch": 0.99, "grad_norm": 1.4707600501148679, "learning_rate": 9.292608325789376e-10, "loss": 0.6858, "step": 15530 }, { "epoch": 0.99, "grad_norm": 1.8310183630963144, "learning_rate": 9.093847726110439e-10, "loss": 0.7394, "step": 15531 }, { "epoch": 0.99, "grad_norm": 1.543924668686868, "learning_rate": 8.897235630539458e-10, "loss": 0.6515, "step": 15532 }, { "epoch": 0.99, "grad_norm": 1.5787790175743275, "learning_rate": 8.70277204751413e-10, "loss": 0.6744, "step": 15533 }, { "epoch": 0.99, "grad_norm": 1.5451347319225388, "learning_rate": 8.510456985399985e-10, "loss": 0.649, "step": 15534 }, { "epoch": 0.99, "grad_norm": 1.621298362769302, "learning_rate": 8.320290452462632e-10, "loss": 0.6676, "step": 15535 }, { "epoch": 0.99, "grad_norm": 1.653697171729579, "learning_rate": 8.132272456867762e-10, "loss": 0.6831, "step": 15536 }, { "epoch": 0.99, "grad_norm": 1.665218465265674, "learning_rate": 7.946403006703351e-10, "loss": 0.5911, "step": 15537 }, { "epoch": 0.99, "grad_norm": 1.5211440698380563, "learning_rate": 7.762682109951902e-10, "loss": 0.6609, "step": 15538 }, { "epoch": 0.99, "grad_norm": 1.4003899322647593, "learning_rate": 7.581109774512651e-10, "loss": 0.5901, "step": 15539 }, { "epoch": 0.99, "grad_norm": 1.092605039607523, "learning_rate": 7.401686008190467e-10, "loss": 0.6333, "step": 15540 }, { "epoch": 0.99, "grad_norm": 1.5639733977490375, "learning_rate": 7.224410818695848e-10, "loss": 0.6874, "step": 15541 }, { "epoch": 0.99, "grad_norm": 2.2306598820136516, "learning_rate": 7.049284213644925e-10, "loss": 0.668, "step": 15542 }, { "epoch": 0.99, "grad_norm": 2.7268953382428927, "learning_rate": 6.876306200565008e-10, "loss": 0.772, "step": 15543 }, { "epoch": 0.99, "grad_norm": 1.6803202607803862, "learning_rate": 6.705476786894593e-10, "loss": 0.6145, "step": 15544 }, { "epoch": 0.99, "grad_norm": 1.5317258951007755, "learning_rate": 6.536795979966703e-10, "loss": 0.5216, "step": 15545 }, { "epoch": 1.0, "grad_norm": 1.5452002104533107, "learning_rate": 6.370263787042196e-10, "loss": 0.627, "step": 15546 }, { "epoch": 1.0, "grad_norm": 1.9353534031971487, "learning_rate": 6.20588021527091e-10, "loss": 0.6876, "step": 15547 }, { "epoch": 1.0, "grad_norm": 1.4609423834655517, "learning_rate": 6.043645271719411e-10, "loss": 0.6878, "step": 15548 }, { "epoch": 1.0, "grad_norm": 1.6865034790221314, "learning_rate": 5.883558963359903e-10, "loss": 0.7098, "step": 15549 }, { "epoch": 1.0, "grad_norm": 1.6177424258732342, "learning_rate": 5.725621297075768e-10, "loss": 0.6415, "step": 15550 }, { "epoch": 1.0, "grad_norm": 1.502717222754447, "learning_rate": 5.569832279644915e-10, "loss": 0.8181, "step": 15551 }, { "epoch": 1.0, "grad_norm": 1.5957880111761718, "learning_rate": 5.416191917778646e-10, "loss": 0.5879, "step": 15552 }, { "epoch": 1.0, "grad_norm": 1.7407621896845942, "learning_rate": 5.264700218066133e-10, "loss": 0.5959, "step": 15553 }, { "epoch": 1.0, "grad_norm": 1.8218636970410527, "learning_rate": 5.115357187024383e-10, "loss": 0.6357, "step": 15554 }, { "epoch": 1.0, "grad_norm": 1.4599723283059585, "learning_rate": 4.968162831070489e-10, "loss": 0.6788, "step": 15555 }, { "epoch": 1.0, "grad_norm": 1.7738872477949996, "learning_rate": 4.823117156532719e-10, "loss": 0.7324, "step": 15556 }, { "epoch": 1.0, "grad_norm": 1.5771531046685272, "learning_rate": 4.680220169639427e-10, "loss": 0.6525, "step": 15557 }, { "epoch": 1.0, "grad_norm": 1.3507938782792974, "learning_rate": 4.539471876535695e-10, "loss": 0.6075, "step": 15558 }, { "epoch": 1.0, "grad_norm": 1.32421584420197, "learning_rate": 4.4008722832722397e-10, "loss": 0.5547, "step": 15559 }, { "epoch": 1.0, "grad_norm": 1.7078983188249162, "learning_rate": 4.264421395805407e-10, "loss": 0.6454, "step": 15560 }, { "epoch": 1.0, "grad_norm": 1.642327870841675, "learning_rate": 4.1301192199971753e-10, "loss": 0.6515, "step": 15561 }, { "epoch": 1.0, "grad_norm": 1.5222042715039084, "learning_rate": 3.9979657616207037e-10, "loss": 0.7008, "step": 15562 }, { "epoch": 1.0, "grad_norm": 1.7702771209594677, "learning_rate": 3.867961026354783e-10, "loss": 0.7127, "step": 15563 }, { "epoch": 1.0, "grad_norm": 2.317207297784229, "learning_rate": 3.740105019789386e-10, "loss": 0.6174, "step": 15564 }, { "epoch": 1.0, "grad_norm": 1.5867711461083878, "learning_rate": 3.6143977474201175e-10, "loss": 0.6158, "step": 15565 }, { "epoch": 1.0, "grad_norm": 1.8103061214703695, "learning_rate": 3.4908392146426606e-10, "loss": 0.7275, "step": 15566 }, { "epoch": 1.0, "grad_norm": 1.78533069358823, "learning_rate": 3.369429426769433e-10, "loss": 0.5581, "step": 15567 }, { "epoch": 1.0, "grad_norm": 1.6146419264316263, "learning_rate": 3.250168389024033e-10, "loss": 0.5649, "step": 15568 }, { "epoch": 1.0, "grad_norm": 2.060214471888526, "learning_rate": 3.1330561065301413e-10, "loss": 0.7171, "step": 15569 }, { "epoch": 1.0, "grad_norm": 1.5054427079890935, "learning_rate": 3.0180925843170674e-10, "loss": 0.6634, "step": 15570 }, { "epoch": 1.0, "grad_norm": 1.5395900323330625, "learning_rate": 2.9052778273308546e-10, "loss": 0.6668, "step": 15571 }, { "epoch": 1.0, "grad_norm": 1.4945965180288507, "learning_rate": 2.794611840417627e-10, "loss": 0.6764, "step": 15572 }, { "epoch": 1.0, "grad_norm": 2.903022802379486, "learning_rate": 2.686094628329139e-10, "loss": 0.6748, "step": 15573 }, { "epoch": 1.0, "grad_norm": 1.549739318986098, "learning_rate": 2.579726195739429e-10, "loss": 0.7211, "step": 15574 }, { "epoch": 1.0, "grad_norm": 1.6365785008277673, "learning_rate": 2.475506547211515e-10, "loss": 0.6349, "step": 15575 }, { "epoch": 1.0, "grad_norm": 2.1080212308084527, "learning_rate": 2.373435687225145e-10, "loss": 0.7064, "step": 15576 }, { "epoch": 1.0, "grad_norm": 1.5105626768279352, "learning_rate": 2.2735136201712527e-10, "loss": 0.6531, "step": 15577 }, { "epoch": 1.0, "grad_norm": 2.880485338120201, "learning_rate": 2.1757403503408492e-10, "loss": 0.6273, "step": 15578 }, { "epoch": 1.0, "grad_norm": 1.3847442089890987, "learning_rate": 2.080115881936129e-10, "loss": 0.6034, "step": 15579 }, { "epoch": 1.0, "grad_norm": 1.858671778082277, "learning_rate": 1.9866402190704682e-10, "loss": 0.5948, "step": 15580 }, { "epoch": 1.0, "grad_norm": 1.4893154749555726, "learning_rate": 1.895313365757323e-10, "loss": 0.6742, "step": 15581 }, { "epoch": 1.0, "grad_norm": 1.4979644414588549, "learning_rate": 1.8061353259213322e-10, "loss": 0.5983, "step": 15582 }, { "epoch": 1.0, "grad_norm": 1.533582482720828, "learning_rate": 1.7191061033983157e-10, "loss": 0.6404, "step": 15583 }, { "epoch": 1.0, "grad_norm": 1.085576883586723, "learning_rate": 1.634225701929726e-10, "loss": 0.6794, "step": 15584 }, { "epoch": 1.0, "grad_norm": 1.5694820334730872, "learning_rate": 1.5514941251570937e-10, "loss": 0.5773, "step": 15585 }, { "epoch": 1.0, "grad_norm": 1.4893031078843422, "learning_rate": 1.470911376644235e-10, "loss": 0.7164, "step": 15586 }, { "epoch": 1.0, "grad_norm": 1.5101500414301452, "learning_rate": 1.392477459843944e-10, "loss": 0.608, "step": 15587 }, { "epoch": 1.0, "grad_norm": 1.7547848187977604, "learning_rate": 1.3161923781424002e-10, "loss": 0.6679, "step": 15588 }, { "epoch": 1.0, "grad_norm": 1.1649765785062791, "learning_rate": 1.2420561348036598e-10, "loss": 0.6054, "step": 15589 }, { "epoch": 1.0, "grad_norm": 1.7308876911074098, "learning_rate": 1.1700687330196137e-10, "loss": 0.6834, "step": 15590 }, { "epoch": 1.0, "grad_norm": 1.5584348183722143, "learning_rate": 1.1002301758822331e-10, "loss": 0.5934, "step": 15591 }, { "epoch": 1.0, "grad_norm": 1.7053651037966855, "learning_rate": 1.0325404664002225e-10, "loss": 0.6964, "step": 15592 }, { "epoch": 1.0, "grad_norm": 1.5044855844865166, "learning_rate": 9.66999607471264e-11, "loss": 0.6603, "step": 15593 }, { "epoch": 1.0, "grad_norm": 1.661771895421125, "learning_rate": 9.03607601920875e-11, "loss": 0.712, "step": 15594 }, { "epoch": 1.0, "grad_norm": 1.60891784155606, "learning_rate": 8.423644524691021e-11, "loss": 0.5564, "step": 15595 }, { "epoch": 1.0, "grad_norm": 1.482864240849518, "learning_rate": 7.83270161752725e-11, "loss": 0.5901, "step": 15596 }, { "epoch": 1.0, "grad_norm": 1.5456812137662408, "learning_rate": 7.263247323086032e-11, "loss": 0.607, "step": 15597 }, { "epoch": 1.0, "grad_norm": 1.56058462171749, "learning_rate": 6.715281665847784e-11, "loss": 0.5818, "step": 15598 }, { "epoch": 1.0, "grad_norm": 1.6453686042617781, "learning_rate": 6.188804669349236e-11, "loss": 0.6923, "step": 15599 }, { "epoch": 1.0, "grad_norm": 1.6355792130840086, "learning_rate": 5.683816356183425e-11, "loss": 0.6646, "step": 15600 }, { "epoch": 1.0, "grad_norm": 1.4097067662457474, "learning_rate": 5.200316748110723e-11, "loss": 0.6228, "step": 15601 }, { "epoch": 1.0, "grad_norm": 1.517523364823851, "learning_rate": 4.738305865947812e-11, "loss": 0.723, "step": 15602 }, { "epoch": 1.0, "grad_norm": 1.3690704598435661, "learning_rate": 4.297783729456662e-11, "loss": 0.6415, "step": 15603 }, { "epoch": 1.0, "grad_norm": 1.5754941465879198, "learning_rate": 3.8787503576220854e-11, "loss": 0.5925, "step": 15604 }, { "epoch": 1.0, "grad_norm": 1.8000877203415235, "learning_rate": 3.481205768429696e-11, "loss": 0.621, "step": 15605 }, { "epoch": 1.0, "grad_norm": 1.741065363499767, "learning_rate": 3.105149978976929e-11, "loss": 0.7576, "step": 15606 }, { "epoch": 1.0, "grad_norm": 1.1880701670269653, "learning_rate": 2.7505830054175287e-11, "loss": 0.6656, "step": 15607 }, { "epoch": 1.0, "grad_norm": 1.928820548912361, "learning_rate": 2.4175048630170617e-11, "loss": 0.6674, "step": 15608 }, { "epoch": 1.0, "grad_norm": 1.7449432928743558, "learning_rate": 2.105915566041894e-11, "loss": 0.7856, "step": 15609 }, { "epoch": 1.0, "grad_norm": 1.5076852364763496, "learning_rate": 1.8158151279812354e-11, "loss": 0.7003, "step": 15610 }, { "epoch": 1.0, "grad_norm": 1.210988959059246, "learning_rate": 1.5472035611585613e-11, "loss": 0.6505, "step": 15611 }, { "epoch": 1.0, "grad_norm": 1.5645902195858064, "learning_rate": 1.3000808772312134e-11, "loss": 0.702, "step": 15612 }, { "epoch": 1.0, "grad_norm": 1.6420240513141375, "learning_rate": 1.0744470868018219e-11, "loss": 0.6375, "step": 15613 }, { "epoch": 1.0, "grad_norm": 1.4963276413178943, "learning_rate": 8.70302199529327e-12, "loss": 0.6034, "step": 15614 }, { "epoch": 1.0, "grad_norm": 1.7419898881297107, "learning_rate": 6.876462241844906e-12, "loss": 0.7693, "step": 15615 }, { "epoch": 1.0, "grad_norm": 1.757006428568812, "learning_rate": 5.264791687054072e-12, "loss": 0.6151, "step": 15616 }, { "epoch": 1.0, "grad_norm": 1.7085073703170883, "learning_rate": 3.868010399199484e-12, "loss": 0.7305, "step": 15617 }, { "epoch": 1.0, "grad_norm": 1.486585617563616, "learning_rate": 2.686118438788299e-12, "loss": 0.6011, "step": 15618 }, { "epoch": 1.0, "grad_norm": 1.4918758087606174, "learning_rate": 1.7191158563356626e-12, "loss": 0.7288, "step": 15619 }, { "epoch": 1.0, "grad_norm": 1.6918619913317596, "learning_rate": 9.670026934749388e-13, "loss": 0.7195, "step": 15620 }, { "epoch": 1.0, "grad_norm": 1.5210915960816564, "learning_rate": 4.2977898240259554e-13, "loss": 0.6447, "step": 15621 }, { "epoch": 1.0, "grad_norm": 1.7206404036551763, "learning_rate": 1.0744474698842767e-13, "loss": 0.6213, "step": 15622 }, { "epoch": 1.0, "grad_norm": 1.5421527296060364, "learning_rate": 0.0, "loss": 0.616, "step": 15623 }, { "epoch": 1.0, "step": 15623, "total_flos": 3372871092985856.0, "train_loss": 0.7009039495693754, "train_runtime": 60699.7504, "train_samples_per_second": 16.473, "train_steps_per_second": 0.257 } ], "logging_steps": 1.0, "max_steps": 15623, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "total_flos": 3372871092985856.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }