{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 14205, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.039774727208729e-05, "grad_norm": 327.84918212890625, "learning_rate": 3.278688524590164e-08, "loss": 3.1585, "step": 1 }, { "epoch": 0.00014079549454417458, "grad_norm": 324.16375732421875, "learning_rate": 6.557377049180328e-08, "loss": 3.0366, "step": 2 }, { "epoch": 0.00021119324181626187, "grad_norm": 310.46563720703125, "learning_rate": 9.836065573770492e-08, "loss": 2.8635, "step": 3 }, { "epoch": 0.00028159098908834917, "grad_norm": 307.29510498046875, "learning_rate": 1.3114754098360656e-07, "loss": 3.0714, "step": 4 }, { "epoch": 0.00035198873636043646, "grad_norm": 289.9696044921875, "learning_rate": 1.639344262295082e-07, "loss": 2.8635, "step": 5 }, { "epoch": 0.00042238648363252375, "grad_norm": 277.79486083984375, "learning_rate": 1.9672131147540984e-07, "loss": 3.0361, "step": 6 }, { "epoch": 0.0004927842309046111, "grad_norm": 325.2139587402344, "learning_rate": 2.295081967213115e-07, "loss": 2.984, "step": 7 }, { "epoch": 0.0005631819781766983, "grad_norm": 327.0364990234375, "learning_rate": 2.622950819672131e-07, "loss": 2.8488, "step": 8 }, { "epoch": 0.0006335797254487857, "grad_norm": 276.72918701171875, "learning_rate": 2.950819672131147e-07, "loss": 2.751, "step": 9 }, { "epoch": 0.0007039774727208729, "grad_norm": 292.3111267089844, "learning_rate": 3.278688524590164e-07, "loss": 2.7861, "step": 10 }, { "epoch": 0.0007743752199929603, "grad_norm": 243.3485107421875, "learning_rate": 3.60655737704918e-07, "loss": 2.8307, "step": 11 }, { "epoch": 0.0008447729672650475, "grad_norm": 371.9922180175781, "learning_rate": 3.9344262295081967e-07, "loss": 2.9013, "step": 12 }, { "epoch": 0.0009151707145371348, "grad_norm": 308.1575927734375, "learning_rate": 4.2622950819672127e-07, "loss": 2.8596, "step": 13 }, { "epoch": 0.0009855684618092222, "grad_norm": 298.6134033203125, "learning_rate": 4.59016393442623e-07, "loss": 2.8469, "step": 14 }, { "epoch": 0.0010559662090813093, "grad_norm": 265.7218322753906, "learning_rate": 4.918032786885245e-07, "loss": 2.7987, "step": 15 }, { "epoch": 0.0011263639563533967, "grad_norm": 264.1418151855469, "learning_rate": 5.245901639344262e-07, "loss": 2.6814, "step": 16 }, { "epoch": 0.001196761703625484, "grad_norm": 198.11727905273438, "learning_rate": 5.573770491803279e-07, "loss": 2.4296, "step": 17 }, { "epoch": 0.0012671594508975714, "grad_norm": 207.57676696777344, "learning_rate": 5.901639344262294e-07, "loss": 2.2371, "step": 18 }, { "epoch": 0.0013375571981696585, "grad_norm": 159.0576629638672, "learning_rate": 6.229508196721311e-07, "loss": 2.3463, "step": 19 }, { "epoch": 0.0014079549454417458, "grad_norm": 173.71910095214844, "learning_rate": 6.557377049180328e-07, "loss": 2.2895, "step": 20 }, { "epoch": 0.0014783526927138332, "grad_norm": 184.00210571289062, "learning_rate": 6.885245901639344e-07, "loss": 2.2514, "step": 21 }, { "epoch": 0.0015487504399859205, "grad_norm": 165.90907287597656, "learning_rate": 7.21311475409836e-07, "loss": 2.2558, "step": 22 }, { "epoch": 0.0016191481872580077, "grad_norm": 148.08236694335938, "learning_rate": 7.540983606557376e-07, "loss": 2.2918, "step": 23 }, { "epoch": 0.001689545934530095, "grad_norm": 72.54096221923828, "learning_rate": 7.868852459016393e-07, "loss": 1.794, "step": 24 }, { "epoch": 0.0017599436818021823, "grad_norm": 69.06537628173828, "learning_rate": 8.19672131147541e-07, "loss": 1.9379, "step": 25 }, { "epoch": 0.0018303414290742697, "grad_norm": 49.08950424194336, "learning_rate": 8.524590163934425e-07, "loss": 1.9297, "step": 26 }, { "epoch": 0.0019007391763463568, "grad_norm": 52.08783721923828, "learning_rate": 8.852459016393443e-07, "loss": 1.7796, "step": 27 }, { "epoch": 0.0019711369236184444, "grad_norm": 63.28836441040039, "learning_rate": 9.18032786885246e-07, "loss": 1.8632, "step": 28 }, { "epoch": 0.0020415346708905313, "grad_norm": 47.32307052612305, "learning_rate": 9.508196721311477e-07, "loss": 1.8497, "step": 29 }, { "epoch": 0.0021119324181626186, "grad_norm": 63.913455963134766, "learning_rate": 9.83606557377049e-07, "loss": 1.6733, "step": 30 }, { "epoch": 0.002182330165434706, "grad_norm": 36.255428314208984, "learning_rate": 1.0163934426229509e-06, "loss": 1.672, "step": 31 }, { "epoch": 0.0022527279127067933, "grad_norm": 33.048282623291016, "learning_rate": 1.0491803278688525e-06, "loss": 1.6959, "step": 32 }, { "epoch": 0.0023231256599788807, "grad_norm": 28.955873489379883, "learning_rate": 1.081967213114754e-06, "loss": 1.7657, "step": 33 }, { "epoch": 0.002393523407250968, "grad_norm": 36.223793029785156, "learning_rate": 1.1147540983606559e-06, "loss": 1.6015, "step": 34 }, { "epoch": 0.0024639211545230554, "grad_norm": 33.806453704833984, "learning_rate": 1.1475409836065573e-06, "loss": 1.6152, "step": 35 }, { "epoch": 0.0025343189017951427, "grad_norm": 21.79129981994629, "learning_rate": 1.1803278688524589e-06, "loss": 1.3907, "step": 36 }, { "epoch": 0.00260471664906723, "grad_norm": 17.227123260498047, "learning_rate": 1.2131147540983607e-06, "loss": 1.4542, "step": 37 }, { "epoch": 0.002675114396339317, "grad_norm": 21.50402069091797, "learning_rate": 1.2459016393442623e-06, "loss": 1.3454, "step": 38 }, { "epoch": 0.0027455121436114043, "grad_norm": 21.000303268432617, "learning_rate": 1.2786885245901639e-06, "loss": 1.4508, "step": 39 }, { "epoch": 0.0028159098908834917, "grad_norm": 28.696334838867188, "learning_rate": 1.3114754098360657e-06, "loss": 1.4662, "step": 40 }, { "epoch": 0.002886307638155579, "grad_norm": 29.718914031982422, "learning_rate": 1.344262295081967e-06, "loss": 1.3502, "step": 41 }, { "epoch": 0.0029567053854276664, "grad_norm": 17.292678833007812, "learning_rate": 1.3770491803278689e-06, "loss": 1.4447, "step": 42 }, { "epoch": 0.0030271031326997537, "grad_norm": 18.584470748901367, "learning_rate": 1.4098360655737705e-06, "loss": 1.3633, "step": 43 }, { "epoch": 0.003097500879971841, "grad_norm": 12.572699546813965, "learning_rate": 1.442622950819672e-06, "loss": 1.2521, "step": 44 }, { "epoch": 0.0031678986272439284, "grad_norm": 13.638853073120117, "learning_rate": 1.4754098360655739e-06, "loss": 1.2253, "step": 45 }, { "epoch": 0.0032382963745160153, "grad_norm": 16.544931411743164, "learning_rate": 1.5081967213114753e-06, "loss": 1.3009, "step": 46 }, { "epoch": 0.0033086941217881027, "grad_norm": 54.511104583740234, "learning_rate": 1.540983606557377e-06, "loss": 1.248, "step": 47 }, { "epoch": 0.00337909186906019, "grad_norm": 10.137621879577637, "learning_rate": 1.5737704918032787e-06, "loss": 1.2674, "step": 48 }, { "epoch": 0.0034494896163322773, "grad_norm": 16.869211196899414, "learning_rate": 1.6065573770491803e-06, "loss": 1.3589, "step": 49 }, { "epoch": 0.0035198873636043647, "grad_norm": 10.89695930480957, "learning_rate": 1.639344262295082e-06, "loss": 1.2846, "step": 50 }, { "epoch": 0.003590285110876452, "grad_norm": 23.282859802246094, "learning_rate": 1.6721311475409835e-06, "loss": 1.2672, "step": 51 }, { "epoch": 0.0036606828581485394, "grad_norm": 12.657679557800293, "learning_rate": 1.704918032786885e-06, "loss": 1.2303, "step": 52 }, { "epoch": 0.0037310806054206267, "grad_norm": 11.95226764678955, "learning_rate": 1.737704918032787e-06, "loss": 1.1187, "step": 53 }, { "epoch": 0.0038014783526927136, "grad_norm": 9.12307357788086, "learning_rate": 1.7704918032786885e-06, "loss": 1.2917, "step": 54 }, { "epoch": 0.003871876099964801, "grad_norm": 11.277148246765137, "learning_rate": 1.80327868852459e-06, "loss": 1.2548, "step": 55 }, { "epoch": 0.003942273847236889, "grad_norm": 9.948272705078125, "learning_rate": 1.836065573770492e-06, "loss": 1.3304, "step": 56 }, { "epoch": 0.004012671594508976, "grad_norm": 7.373676776885986, "learning_rate": 1.8688524590163935e-06, "loss": 1.2114, "step": 57 }, { "epoch": 0.004083069341781063, "grad_norm": 9.533021926879883, "learning_rate": 1.9016393442622953e-06, "loss": 1.2734, "step": 58 }, { "epoch": 0.00415346708905315, "grad_norm": 8.02818489074707, "learning_rate": 1.9344262295081967e-06, "loss": 1.0933, "step": 59 }, { "epoch": 0.004223864836325237, "grad_norm": 14.541219711303711, "learning_rate": 1.967213114754098e-06, "loss": 1.127, "step": 60 }, { "epoch": 0.004294262583597325, "grad_norm": 10.511273384094238, "learning_rate": 2e-06, "loss": 1.0691, "step": 61 }, { "epoch": 0.004364660330869412, "grad_norm": 10.930693626403809, "learning_rate": 2.0327868852459017e-06, "loss": 1.1183, "step": 62 }, { "epoch": 0.0044350580781415, "grad_norm": 7.912240982055664, "learning_rate": 2.065573770491803e-06, "loss": 1.1607, "step": 63 }, { "epoch": 0.004505455825413587, "grad_norm": 11.118265151977539, "learning_rate": 2.098360655737705e-06, "loss": 1.1515, "step": 64 }, { "epoch": 0.0045758535726856744, "grad_norm": 12.548416137695312, "learning_rate": 2.1311475409836067e-06, "loss": 1.2582, "step": 65 }, { "epoch": 0.004646251319957761, "grad_norm": 16.8477840423584, "learning_rate": 2.163934426229508e-06, "loss": 1.035, "step": 66 }, { "epoch": 0.004716649067229848, "grad_norm": 7.983528137207031, "learning_rate": 2.19672131147541e-06, "loss": 1.0744, "step": 67 }, { "epoch": 0.004787046814501936, "grad_norm": 8.137669563293457, "learning_rate": 2.2295081967213117e-06, "loss": 1.3031, "step": 68 }, { "epoch": 0.004857444561774023, "grad_norm": 7.361748218536377, "learning_rate": 2.262295081967213e-06, "loss": 1.1622, "step": 69 }, { "epoch": 0.004927842309046111, "grad_norm": 5.179004192352295, "learning_rate": 2.2950819672131145e-06, "loss": 0.9968, "step": 70 }, { "epoch": 0.004998240056318198, "grad_norm": 6.043445110321045, "learning_rate": 2.3278688524590163e-06, "loss": 1.0931, "step": 71 }, { "epoch": 0.005068637803590285, "grad_norm": 11.305768966674805, "learning_rate": 2.3606557377049177e-06, "loss": 1.1006, "step": 72 }, { "epoch": 0.005139035550862372, "grad_norm": 8.141422271728516, "learning_rate": 2.3934426229508195e-06, "loss": 1.0072, "step": 73 }, { "epoch": 0.00520943329813446, "grad_norm": 19.329275131225586, "learning_rate": 2.4262295081967213e-06, "loss": 1.0971, "step": 74 }, { "epoch": 0.005279831045406547, "grad_norm": 12.364167213439941, "learning_rate": 2.4590163934426227e-06, "loss": 1.1431, "step": 75 }, { "epoch": 0.005350228792678634, "grad_norm": 7.520205020904541, "learning_rate": 2.4918032786885245e-06, "loss": 1.1131, "step": 76 }, { "epoch": 0.005420626539950722, "grad_norm": 5.590424060821533, "learning_rate": 2.5245901639344264e-06, "loss": 1.0403, "step": 77 }, { "epoch": 0.005491024287222809, "grad_norm": 8.368648529052734, "learning_rate": 2.5573770491803277e-06, "loss": 1.177, "step": 78 }, { "epoch": 0.005561422034494896, "grad_norm": 34.531700134277344, "learning_rate": 2.5901639344262296e-06, "loss": 1.1472, "step": 79 }, { "epoch": 0.005631819781766983, "grad_norm": 5.946082592010498, "learning_rate": 2.6229508196721314e-06, "loss": 1.136, "step": 80 }, { "epoch": 0.005702217529039071, "grad_norm": 7.393290042877197, "learning_rate": 2.6557377049180328e-06, "loss": 1.0315, "step": 81 }, { "epoch": 0.005772615276311158, "grad_norm": 5.287744998931885, "learning_rate": 2.688524590163934e-06, "loss": 1.0025, "step": 82 }, { "epoch": 0.005843013023583245, "grad_norm": 6.3712592124938965, "learning_rate": 2.721311475409836e-06, "loss": 1.0929, "step": 83 }, { "epoch": 0.005913410770855333, "grad_norm": 8.836492538452148, "learning_rate": 2.7540983606557378e-06, "loss": 1.0585, "step": 84 }, { "epoch": 0.00598380851812742, "grad_norm": 7.777792930603027, "learning_rate": 2.786885245901639e-06, "loss": 1.0443, "step": 85 }, { "epoch": 0.006054206265399507, "grad_norm": 5.140045166015625, "learning_rate": 2.819672131147541e-06, "loss": 1.0398, "step": 86 }, { "epoch": 0.006124604012671594, "grad_norm": 4.603422164916992, "learning_rate": 2.8524590163934428e-06, "loss": 1.1361, "step": 87 }, { "epoch": 0.006195001759943682, "grad_norm": 6.970488548278809, "learning_rate": 2.885245901639344e-06, "loss": 0.9699, "step": 88 }, { "epoch": 0.006265399507215769, "grad_norm": 4.829523086547852, "learning_rate": 2.918032786885246e-06, "loss": 1.042, "step": 89 }, { "epoch": 0.006335797254487857, "grad_norm": 4.901027202606201, "learning_rate": 2.9508196721311478e-06, "loss": 1.1206, "step": 90 }, { "epoch": 0.006406195001759944, "grad_norm": 6.397693157196045, "learning_rate": 2.9836065573770487e-06, "loss": 1.1563, "step": 91 }, { "epoch": 0.006476592749032031, "grad_norm": 4.759715557098389, "learning_rate": 3.0163934426229506e-06, "loss": 1.0057, "step": 92 }, { "epoch": 0.006546990496304118, "grad_norm": 5.691864967346191, "learning_rate": 3.0491803278688524e-06, "loss": 0.9445, "step": 93 }, { "epoch": 0.006617388243576205, "grad_norm": 5.3856425285339355, "learning_rate": 3.081967213114754e-06, "loss": 1.0753, "step": 94 }, { "epoch": 0.006687785990848293, "grad_norm": 5.01511287689209, "learning_rate": 3.1147540983606556e-06, "loss": 0.9226, "step": 95 }, { "epoch": 0.00675818373812038, "grad_norm": 5.576282978057861, "learning_rate": 3.1475409836065574e-06, "loss": 0.9603, "step": 96 }, { "epoch": 0.006828581485392468, "grad_norm": 4.554950714111328, "learning_rate": 3.180327868852459e-06, "loss": 1.1536, "step": 97 }, { "epoch": 0.006898979232664555, "grad_norm": 6.626781463623047, "learning_rate": 3.2131147540983606e-06, "loss": 1.0018, "step": 98 }, { "epoch": 0.006969376979936642, "grad_norm": 4.615559101104736, "learning_rate": 3.2459016393442624e-06, "loss": 1.1572, "step": 99 }, { "epoch": 0.007039774727208729, "grad_norm": 5.9900126457214355, "learning_rate": 3.278688524590164e-06, "loss": 0.9231, "step": 100 }, { "epoch": 0.007110172474480816, "grad_norm": 5.481687068939209, "learning_rate": 3.3114754098360656e-06, "loss": 1.0406, "step": 101 }, { "epoch": 0.007180570221752904, "grad_norm": 4.90478515625, "learning_rate": 3.344262295081967e-06, "loss": 1.1233, "step": 102 }, { "epoch": 0.007250967969024991, "grad_norm": 5.622109889984131, "learning_rate": 3.377049180327869e-06, "loss": 1.0167, "step": 103 }, { "epoch": 0.007321365716297079, "grad_norm": 7.457308769226074, "learning_rate": 3.40983606557377e-06, "loss": 0.8555, "step": 104 }, { "epoch": 0.007391763463569166, "grad_norm": 7.173100471496582, "learning_rate": 3.442622950819672e-06, "loss": 1.0897, "step": 105 }, { "epoch": 0.0074621612108412535, "grad_norm": 5.910191535949707, "learning_rate": 3.475409836065574e-06, "loss": 1.0607, "step": 106 }, { "epoch": 0.00753255895811334, "grad_norm": 6.937999725341797, "learning_rate": 3.508196721311475e-06, "loss": 1.0604, "step": 107 }, { "epoch": 0.007602956705385427, "grad_norm": 5.2612385749816895, "learning_rate": 3.540983606557377e-06, "loss": 0.9314, "step": 108 }, { "epoch": 0.007673354452657515, "grad_norm": 5.505253791809082, "learning_rate": 3.573770491803279e-06, "loss": 0.8598, "step": 109 }, { "epoch": 0.007743752199929602, "grad_norm": 4.654024124145508, "learning_rate": 3.60655737704918e-06, "loss": 1.0895, "step": 110 }, { "epoch": 0.00781414994720169, "grad_norm": 4.011298179626465, "learning_rate": 3.639344262295082e-06, "loss": 0.9924, "step": 111 }, { "epoch": 0.007884547694473778, "grad_norm": 6.011845111846924, "learning_rate": 3.672131147540984e-06, "loss": 0.9868, "step": 112 }, { "epoch": 0.007954945441745864, "grad_norm": 4.440524578094482, "learning_rate": 3.704918032786885e-06, "loss": 0.9508, "step": 113 }, { "epoch": 0.008025343189017951, "grad_norm": 4.9957966804504395, "learning_rate": 3.737704918032787e-06, "loss": 0.9013, "step": 114 }, { "epoch": 0.008095740936290039, "grad_norm": 5.244853496551514, "learning_rate": 3.770491803278689e-06, "loss": 0.9226, "step": 115 }, { "epoch": 0.008166138683562125, "grad_norm": 5.495327949523926, "learning_rate": 3.8032786885245906e-06, "loss": 1.0457, "step": 116 }, { "epoch": 0.008236536430834213, "grad_norm": 4.33501672744751, "learning_rate": 3.8360655737704925e-06, "loss": 0.9441, "step": 117 }, { "epoch": 0.0083069341781063, "grad_norm": 4.686830997467041, "learning_rate": 3.868852459016393e-06, "loss": 0.912, "step": 118 }, { "epoch": 0.008377331925378389, "grad_norm": 7.1909708976745605, "learning_rate": 3.901639344262294e-06, "loss": 1.0878, "step": 119 }, { "epoch": 0.008447729672650475, "grad_norm": 4.669498443603516, "learning_rate": 3.934426229508196e-06, "loss": 1.0657, "step": 120 }, { "epoch": 0.008518127419922562, "grad_norm": 5.245838165283203, "learning_rate": 3.967213114754098e-06, "loss": 1.0839, "step": 121 }, { "epoch": 0.00858852516719465, "grad_norm": 4.338572025299072, "learning_rate": 4e-06, "loss": 0.9384, "step": 122 }, { "epoch": 0.008658922914466738, "grad_norm": 5.470822811126709, "learning_rate": 4.032786885245902e-06, "loss": 1.0732, "step": 123 }, { "epoch": 0.008729320661738824, "grad_norm": 4.264244079589844, "learning_rate": 4.0655737704918034e-06, "loss": 1.0364, "step": 124 }, { "epoch": 0.008799718409010912, "grad_norm": 4.813968181610107, "learning_rate": 4.098360655737704e-06, "loss": 1.0919, "step": 125 }, { "epoch": 0.008870116156283, "grad_norm": 5.334741592407227, "learning_rate": 4.131147540983606e-06, "loss": 1.1235, "step": 126 }, { "epoch": 0.008940513903555086, "grad_norm": 5.502646446228027, "learning_rate": 4.163934426229508e-06, "loss": 0.9513, "step": 127 }, { "epoch": 0.009010911650827173, "grad_norm": 5.643490791320801, "learning_rate": 4.19672131147541e-06, "loss": 1.0333, "step": 128 }, { "epoch": 0.009081309398099261, "grad_norm": 6.624214172363281, "learning_rate": 4.229508196721312e-06, "loss": 0.9192, "step": 129 }, { "epoch": 0.009151707145371349, "grad_norm": 4.946504592895508, "learning_rate": 4.2622950819672135e-06, "loss": 1.0762, "step": 130 }, { "epoch": 0.009222104892643435, "grad_norm": 4.4725871086120605, "learning_rate": 4.2950819672131144e-06, "loss": 1.0471, "step": 131 }, { "epoch": 0.009292502639915523, "grad_norm": 7.486828327178955, "learning_rate": 4.327868852459016e-06, "loss": 1.0653, "step": 132 }, { "epoch": 0.00936290038718761, "grad_norm": 9.454997062683105, "learning_rate": 4.360655737704918e-06, "loss": 1.0513, "step": 133 }, { "epoch": 0.009433298134459697, "grad_norm": 5.494566917419434, "learning_rate": 4.39344262295082e-06, "loss": 0.9125, "step": 134 }, { "epoch": 0.009503695881731784, "grad_norm": 5.223452091217041, "learning_rate": 4.426229508196722e-06, "loss": 1.1218, "step": 135 }, { "epoch": 0.009574093629003872, "grad_norm": 4.113816261291504, "learning_rate": 4.4590163934426235e-06, "loss": 0.9365, "step": 136 }, { "epoch": 0.00964449137627596, "grad_norm": 4.752868175506592, "learning_rate": 4.4918032786885244e-06, "loss": 0.9606, "step": 137 }, { "epoch": 0.009714889123548046, "grad_norm": 4.174314498901367, "learning_rate": 4.524590163934426e-06, "loss": 0.8709, "step": 138 }, { "epoch": 0.009785286870820134, "grad_norm": 4.687844753265381, "learning_rate": 4.557377049180327e-06, "loss": 1.1115, "step": 139 }, { "epoch": 0.009855684618092221, "grad_norm": 4.23372745513916, "learning_rate": 4.590163934426229e-06, "loss": 1.0595, "step": 140 }, { "epoch": 0.009926082365364308, "grad_norm": 4.5904459953308105, "learning_rate": 4.622950819672131e-06, "loss": 1.0146, "step": 141 }, { "epoch": 0.009996480112636395, "grad_norm": 3.6803858280181885, "learning_rate": 4.655737704918033e-06, "loss": 0.9218, "step": 142 }, { "epoch": 0.010066877859908483, "grad_norm": 4.220322132110596, "learning_rate": 4.6885245901639345e-06, "loss": 0.9692, "step": 143 }, { "epoch": 0.01013727560718057, "grad_norm": 5.108364105224609, "learning_rate": 4.7213114754098354e-06, "loss": 1.091, "step": 144 }, { "epoch": 0.010207673354452657, "grad_norm": 5.284567356109619, "learning_rate": 4.754098360655737e-06, "loss": 0.944, "step": 145 }, { "epoch": 0.010278071101724745, "grad_norm": 3.9610142707824707, "learning_rate": 4.786885245901639e-06, "loss": 1.1799, "step": 146 }, { "epoch": 0.010348468848996832, "grad_norm": 5.0468597412109375, "learning_rate": 4.819672131147541e-06, "loss": 1.0112, "step": 147 }, { "epoch": 0.01041886659626892, "grad_norm": 6.582139492034912, "learning_rate": 4.852459016393443e-06, "loss": 0.9559, "step": 148 }, { "epoch": 0.010489264343541006, "grad_norm": 3.8485188484191895, "learning_rate": 4.8852459016393445e-06, "loss": 0.9447, "step": 149 }, { "epoch": 0.010559662090813094, "grad_norm": 4.572494029998779, "learning_rate": 4.9180327868852455e-06, "loss": 1.0548, "step": 150 }, { "epoch": 0.010630059838085182, "grad_norm": 4.870029449462891, "learning_rate": 4.950819672131147e-06, "loss": 1.0466, "step": 151 }, { "epoch": 0.010700457585357268, "grad_norm": 4.742334842681885, "learning_rate": 4.983606557377049e-06, "loss": 0.9156, "step": 152 }, { "epoch": 0.010770855332629356, "grad_norm": 4.570158004760742, "learning_rate": 5.016393442622951e-06, "loss": 0.8503, "step": 153 }, { "epoch": 0.010841253079901443, "grad_norm": 4.887471675872803, "learning_rate": 5.049180327868853e-06, "loss": 0.9545, "step": 154 }, { "epoch": 0.010911650827173531, "grad_norm": 4.630635738372803, "learning_rate": 5.0819672131147545e-06, "loss": 0.8893, "step": 155 }, { "epoch": 0.010982048574445617, "grad_norm": 4.824710845947266, "learning_rate": 5.1147540983606555e-06, "loss": 0.8591, "step": 156 }, { "epoch": 0.011052446321717705, "grad_norm": 3.679910659790039, "learning_rate": 5.147540983606557e-06, "loss": 0.9802, "step": 157 }, { "epoch": 0.011122844068989793, "grad_norm": 5.713979244232178, "learning_rate": 5.180327868852459e-06, "loss": 0.989, "step": 158 }, { "epoch": 0.011193241816261879, "grad_norm": 4.592340469360352, "learning_rate": 5.213114754098361e-06, "loss": 0.9028, "step": 159 }, { "epoch": 0.011263639563533967, "grad_norm": 3.9298043251037598, "learning_rate": 5.245901639344263e-06, "loss": 1.0798, "step": 160 }, { "epoch": 0.011334037310806054, "grad_norm": 3.508730888366699, "learning_rate": 5.278688524590164e-06, "loss": 0.9157, "step": 161 }, { "epoch": 0.011404435058078142, "grad_norm": 5.089319705963135, "learning_rate": 5.3114754098360655e-06, "loss": 0.9711, "step": 162 }, { "epoch": 0.011474832805350228, "grad_norm": 4.619134426116943, "learning_rate": 5.3442622950819665e-06, "loss": 0.9138, "step": 163 }, { "epoch": 0.011545230552622316, "grad_norm": 6.311204433441162, "learning_rate": 5.377049180327868e-06, "loss": 0.9143, "step": 164 }, { "epoch": 0.011615628299894404, "grad_norm": 4.683346271514893, "learning_rate": 5.40983606557377e-06, "loss": 0.9999, "step": 165 }, { "epoch": 0.01168602604716649, "grad_norm": 5.870517253875732, "learning_rate": 5.442622950819672e-06, "loss": 0.8818, "step": 166 }, { "epoch": 0.011756423794438578, "grad_norm": 5.734021186828613, "learning_rate": 5.475409836065574e-06, "loss": 0.9859, "step": 167 }, { "epoch": 0.011826821541710665, "grad_norm": 4.360768795013428, "learning_rate": 5.5081967213114755e-06, "loss": 0.9062, "step": 168 }, { "epoch": 0.011897219288982753, "grad_norm": 4.667993068695068, "learning_rate": 5.5409836065573765e-06, "loss": 0.9653, "step": 169 }, { "epoch": 0.01196761703625484, "grad_norm": 4.329316139221191, "learning_rate": 5.573770491803278e-06, "loss": 0.9568, "step": 170 }, { "epoch": 0.012038014783526927, "grad_norm": 4.864582538604736, "learning_rate": 5.60655737704918e-06, "loss": 0.9355, "step": 171 }, { "epoch": 0.012108412530799015, "grad_norm": 4.687588691711426, "learning_rate": 5.639344262295082e-06, "loss": 1.0508, "step": 172 }, { "epoch": 0.012178810278071103, "grad_norm": 4.791179656982422, "learning_rate": 5.672131147540984e-06, "loss": 0.8908, "step": 173 }, { "epoch": 0.012249208025343189, "grad_norm": 4.379640579223633, "learning_rate": 5.7049180327868855e-06, "loss": 0.8989, "step": 174 }, { "epoch": 0.012319605772615276, "grad_norm": 4.857367038726807, "learning_rate": 5.737704918032787e-06, "loss": 0.9819, "step": 175 }, { "epoch": 0.012390003519887364, "grad_norm": 3.231602668762207, "learning_rate": 5.770491803278688e-06, "loss": 0.9106, "step": 176 }, { "epoch": 0.01246040126715945, "grad_norm": 5.948474407196045, "learning_rate": 5.80327868852459e-06, "loss": 0.9429, "step": 177 }, { "epoch": 0.012530799014431538, "grad_norm": 3.734452724456787, "learning_rate": 5.836065573770492e-06, "loss": 0.9505, "step": 178 }, { "epoch": 0.012601196761703626, "grad_norm": 4.1793131828308105, "learning_rate": 5.868852459016394e-06, "loss": 0.8195, "step": 179 }, { "epoch": 0.012671594508975714, "grad_norm": 4.072559356689453, "learning_rate": 5.9016393442622956e-06, "loss": 1.0033, "step": 180 }, { "epoch": 0.0127419922562478, "grad_norm": 4.6122565269470215, "learning_rate": 5.934426229508197e-06, "loss": 0.881, "step": 181 }, { "epoch": 0.012812390003519887, "grad_norm": 4.698636531829834, "learning_rate": 5.9672131147540975e-06, "loss": 0.8632, "step": 182 }, { "epoch": 0.012882787750791975, "grad_norm": 4.806349277496338, "learning_rate": 5.999999999999999e-06, "loss": 0.8217, "step": 183 }, { "epoch": 0.012953185498064061, "grad_norm": 3.4926156997680664, "learning_rate": 6.032786885245901e-06, "loss": 0.9821, "step": 184 }, { "epoch": 0.013023583245336149, "grad_norm": 5.424435615539551, "learning_rate": 6.065573770491803e-06, "loss": 0.924, "step": 185 }, { "epoch": 0.013093980992608237, "grad_norm": 3.5752687454223633, "learning_rate": 6.098360655737705e-06, "loss": 0.9442, "step": 186 }, { "epoch": 0.013164378739880325, "grad_norm": 3.6515116691589355, "learning_rate": 6.1311475409836066e-06, "loss": 0.8173, "step": 187 }, { "epoch": 0.01323477648715241, "grad_norm": 6.96682596206665, "learning_rate": 6.163934426229508e-06, "loss": 0.8514, "step": 188 }, { "epoch": 0.013305174234424498, "grad_norm": 3.709293842315674, "learning_rate": 6.196721311475409e-06, "loss": 0.9514, "step": 189 }, { "epoch": 0.013375571981696586, "grad_norm": 3.450347423553467, "learning_rate": 6.229508196721311e-06, "loss": 1.0049, "step": 190 }, { "epoch": 0.013445969728968672, "grad_norm": 3.825087785720825, "learning_rate": 6.262295081967213e-06, "loss": 0.9766, "step": 191 }, { "epoch": 0.01351636747624076, "grad_norm": 3.5818939208984375, "learning_rate": 6.295081967213115e-06, "loss": 1.0047, "step": 192 }, { "epoch": 0.013586765223512848, "grad_norm": 5.840765476226807, "learning_rate": 6.3278688524590166e-06, "loss": 0.9427, "step": 193 }, { "epoch": 0.013657162970784936, "grad_norm": 4.033573150634766, "learning_rate": 6.360655737704918e-06, "loss": 1.0539, "step": 194 }, { "epoch": 0.013727560718057022, "grad_norm": 6.832907199859619, "learning_rate": 6.393442622950819e-06, "loss": 0.9193, "step": 195 }, { "epoch": 0.01379795846532911, "grad_norm": 4.365390300750732, "learning_rate": 6.426229508196721e-06, "loss": 1.1125, "step": 196 }, { "epoch": 0.013868356212601197, "grad_norm": 3.70200514793396, "learning_rate": 6.459016393442623e-06, "loss": 0.9829, "step": 197 }, { "epoch": 0.013938753959873283, "grad_norm": 12.645018577575684, "learning_rate": 6.491803278688525e-06, "loss": 0.8635, "step": 198 }, { "epoch": 0.014009151707145371, "grad_norm": 3.4400744438171387, "learning_rate": 6.524590163934427e-06, "loss": 0.9068, "step": 199 }, { "epoch": 0.014079549454417459, "grad_norm": 3.9763290882110596, "learning_rate": 6.557377049180328e-06, "loss": 0.8948, "step": 200 }, { "epoch": 0.014149947201689547, "grad_norm": 4.3906121253967285, "learning_rate": 6.590163934426229e-06, "loss": 0.89, "step": 201 }, { "epoch": 0.014220344948961633, "grad_norm": 3.3688299655914307, "learning_rate": 6.622950819672131e-06, "loss": 0.8262, "step": 202 }, { "epoch": 0.01429074269623372, "grad_norm": 3.9636080265045166, "learning_rate": 6.655737704918032e-06, "loss": 1.0216, "step": 203 }, { "epoch": 0.014361140443505808, "grad_norm": 3.3210840225219727, "learning_rate": 6.688524590163934e-06, "loss": 0.8529, "step": 204 }, { "epoch": 0.014431538190777896, "grad_norm": 3.6048660278320312, "learning_rate": 6.721311475409836e-06, "loss": 0.9815, "step": 205 }, { "epoch": 0.014501935938049982, "grad_norm": 4.036753177642822, "learning_rate": 6.754098360655738e-06, "loss": 0.9306, "step": 206 }, { "epoch": 0.01457233368532207, "grad_norm": 6.482748985290527, "learning_rate": 6.786885245901639e-06, "loss": 0.8143, "step": 207 }, { "epoch": 0.014642731432594158, "grad_norm": 3.839012384414673, "learning_rate": 6.81967213114754e-06, "loss": 0.8622, "step": 208 }, { "epoch": 0.014713129179866244, "grad_norm": 3.51278018951416, "learning_rate": 6.852459016393442e-06, "loss": 0.7603, "step": 209 }, { "epoch": 0.014783526927138331, "grad_norm": 3.8590409755706787, "learning_rate": 6.885245901639344e-06, "loss": 0.9236, "step": 210 }, { "epoch": 0.014853924674410419, "grad_norm": 4.040726184844971, "learning_rate": 6.918032786885246e-06, "loss": 0.867, "step": 211 }, { "epoch": 0.014924322421682507, "grad_norm": 3.7332282066345215, "learning_rate": 6.950819672131148e-06, "loss": 0.9563, "step": 212 }, { "epoch": 0.014994720168954593, "grad_norm": 4.816858291625977, "learning_rate": 6.983606557377049e-06, "loss": 1.0263, "step": 213 }, { "epoch": 0.01506511791622668, "grad_norm": 3.9906628131866455, "learning_rate": 7.01639344262295e-06, "loss": 0.9658, "step": 214 }, { "epoch": 0.015135515663498769, "grad_norm": 3.294687509536743, "learning_rate": 7.049180327868852e-06, "loss": 0.9832, "step": 215 }, { "epoch": 0.015205913410770855, "grad_norm": 4.467609405517578, "learning_rate": 7.081967213114754e-06, "loss": 0.9228, "step": 216 }, { "epoch": 0.015276311158042942, "grad_norm": 3.532924175262451, "learning_rate": 7.114754098360656e-06, "loss": 0.8478, "step": 217 }, { "epoch": 0.01534670890531503, "grad_norm": 4.039727210998535, "learning_rate": 7.147540983606558e-06, "loss": 0.828, "step": 218 }, { "epoch": 0.015417106652587118, "grad_norm": 3.7719645500183105, "learning_rate": 7.1803278688524594e-06, "loss": 1.0205, "step": 219 }, { "epoch": 0.015487504399859204, "grad_norm": 6.250553607940674, "learning_rate": 7.21311475409836e-06, "loss": 0.9531, "step": 220 }, { "epoch": 0.015557902147131292, "grad_norm": 3.9546256065368652, "learning_rate": 7.245901639344262e-06, "loss": 0.8949, "step": 221 }, { "epoch": 0.01562829989440338, "grad_norm": 33.228485107421875, "learning_rate": 7.278688524590164e-06, "loss": 0.868, "step": 222 }, { "epoch": 0.015698697641675467, "grad_norm": 4.165818691253662, "learning_rate": 7.311475409836066e-06, "loss": 0.9377, "step": 223 }, { "epoch": 0.015769095388947555, "grad_norm": 3.811441659927368, "learning_rate": 7.344262295081968e-06, "loss": 1.024, "step": 224 }, { "epoch": 0.01583949313621964, "grad_norm": 11.727596282958984, "learning_rate": 7.3770491803278695e-06, "loss": 0.9551, "step": 225 }, { "epoch": 0.015909890883491727, "grad_norm": 3.6691038608551025, "learning_rate": 7.40983606557377e-06, "loss": 0.7604, "step": 226 }, { "epoch": 0.015980288630763815, "grad_norm": 4.284067153930664, "learning_rate": 7.442622950819672e-06, "loss": 0.9928, "step": 227 }, { "epoch": 0.016050686378035903, "grad_norm": 3.348663568496704, "learning_rate": 7.475409836065574e-06, "loss": 0.8041, "step": 228 }, { "epoch": 0.01612108412530799, "grad_norm": 3.5675337314605713, "learning_rate": 7.508196721311476e-06, "loss": 0.9229, "step": 229 }, { "epoch": 0.016191481872580078, "grad_norm": 3.7404117584228516, "learning_rate": 7.540983606557378e-06, "loss": 0.9307, "step": 230 }, { "epoch": 0.016261879619852166, "grad_norm": 3.531076669692993, "learning_rate": 7.5737704918032795e-06, "loss": 0.8148, "step": 231 }, { "epoch": 0.01633227736712425, "grad_norm": 5.2189435958862305, "learning_rate": 7.606557377049181e-06, "loss": 0.813, "step": 232 }, { "epoch": 0.016402675114396338, "grad_norm": 9.95646858215332, "learning_rate": 7.639344262295082e-06, "loss": 0.7263, "step": 233 }, { "epoch": 0.016473072861668426, "grad_norm": 4.791502475738525, "learning_rate": 7.672131147540985e-06, "loss": 0.8592, "step": 234 }, { "epoch": 0.016543470608940514, "grad_norm": 3.484861135482788, "learning_rate": 7.704918032786884e-06, "loss": 0.8775, "step": 235 }, { "epoch": 0.0166138683562126, "grad_norm": 3.6990580558776855, "learning_rate": 7.737704918032787e-06, "loss": 0.9638, "step": 236 }, { "epoch": 0.01668426610348469, "grad_norm": 4.778743743896484, "learning_rate": 7.770491803278688e-06, "loss": 0.921, "step": 237 }, { "epoch": 0.016754663850756777, "grad_norm": 3.448641061782837, "learning_rate": 7.803278688524589e-06, "loss": 0.9816, "step": 238 }, { "epoch": 0.016825061598028865, "grad_norm": 4.0377888679504395, "learning_rate": 7.836065573770491e-06, "loss": 0.8806, "step": 239 }, { "epoch": 0.01689545934530095, "grad_norm": 5.805827617645264, "learning_rate": 7.868852459016392e-06, "loss": 0.9271, "step": 240 }, { "epoch": 0.016965857092573037, "grad_norm": 3.366772174835205, "learning_rate": 7.901639344262295e-06, "loss": 0.9154, "step": 241 }, { "epoch": 0.017036254839845125, "grad_norm": 4.433914661407471, "learning_rate": 7.934426229508196e-06, "loss": 0.8839, "step": 242 }, { "epoch": 0.017106652587117212, "grad_norm": 3.0600433349609375, "learning_rate": 7.967213114754097e-06, "loss": 1.113, "step": 243 }, { "epoch": 0.0171770503343893, "grad_norm": 3.755241632461548, "learning_rate": 8e-06, "loss": 0.9123, "step": 244 }, { "epoch": 0.017247448081661388, "grad_norm": 4.450098514556885, "learning_rate": 8.0327868852459e-06, "loss": 0.8917, "step": 245 }, { "epoch": 0.017317845828933476, "grad_norm": 3.9600069522857666, "learning_rate": 8.065573770491803e-06, "loss": 0.8284, "step": 246 }, { "epoch": 0.01738824357620556, "grad_norm": 3.405292510986328, "learning_rate": 8.098360655737704e-06, "loss": 0.8444, "step": 247 }, { "epoch": 0.017458641323477648, "grad_norm": 5.175904273986816, "learning_rate": 8.131147540983607e-06, "loss": 0.9082, "step": 248 }, { "epoch": 0.017529039070749736, "grad_norm": 3.744906187057495, "learning_rate": 8.163934426229508e-06, "loss": 0.9623, "step": 249 }, { "epoch": 0.017599436818021823, "grad_norm": 4.443304061889648, "learning_rate": 8.196721311475409e-06, "loss": 0.9636, "step": 250 }, { "epoch": 0.01766983456529391, "grad_norm": 4.138010501861572, "learning_rate": 8.229508196721311e-06, "loss": 0.99, "step": 251 }, { "epoch": 0.017740232312566, "grad_norm": 3.523056983947754, "learning_rate": 8.262295081967212e-06, "loss": 0.7728, "step": 252 }, { "epoch": 0.017810630059838087, "grad_norm": 3.575166940689087, "learning_rate": 8.295081967213115e-06, "loss": 0.8997, "step": 253 }, { "epoch": 0.01788102780711017, "grad_norm": 4.717628479003906, "learning_rate": 8.327868852459016e-06, "loss": 0.8844, "step": 254 }, { "epoch": 0.01795142555438226, "grad_norm": 4.759855270385742, "learning_rate": 8.360655737704917e-06, "loss": 0.8421, "step": 255 }, { "epoch": 0.018021823301654347, "grad_norm": 3.1284937858581543, "learning_rate": 8.39344262295082e-06, "loss": 0.7817, "step": 256 }, { "epoch": 0.018092221048926434, "grad_norm": 4.886071681976318, "learning_rate": 8.42622950819672e-06, "loss": 0.9239, "step": 257 }, { "epoch": 0.018162618796198522, "grad_norm": 5.257235527038574, "learning_rate": 8.459016393442623e-06, "loss": 0.8524, "step": 258 }, { "epoch": 0.01823301654347061, "grad_norm": 3.9936141967773438, "learning_rate": 8.491803278688524e-06, "loss": 1.1224, "step": 259 }, { "epoch": 0.018303414290742698, "grad_norm": 4.840516567230225, "learning_rate": 8.524590163934427e-06, "loss": 0.8795, "step": 260 }, { "epoch": 0.018373812038014782, "grad_norm": 4.416502952575684, "learning_rate": 8.557377049180328e-06, "loss": 0.9806, "step": 261 }, { "epoch": 0.01844420978528687, "grad_norm": 3.600187063217163, "learning_rate": 8.590163934426229e-06, "loss": 0.7993, "step": 262 }, { "epoch": 0.018514607532558958, "grad_norm": 6.5911149978637695, "learning_rate": 8.622950819672132e-06, "loss": 0.8129, "step": 263 }, { "epoch": 0.018585005279831045, "grad_norm": 3.085052490234375, "learning_rate": 8.655737704918032e-06, "loss": 1.0117, "step": 264 }, { "epoch": 0.018655403027103133, "grad_norm": 3.9560866355895996, "learning_rate": 8.688524590163935e-06, "loss": 0.9242, "step": 265 }, { "epoch": 0.01872580077437522, "grad_norm": 3.763780117034912, "learning_rate": 8.721311475409836e-06, "loss": 0.8646, "step": 266 }, { "epoch": 0.01879619852164731, "grad_norm": 5.71022891998291, "learning_rate": 8.754098360655739e-06, "loss": 0.9488, "step": 267 }, { "epoch": 0.018866596268919393, "grad_norm": 3.3141403198242188, "learning_rate": 8.78688524590164e-06, "loss": 0.9574, "step": 268 }, { "epoch": 0.01893699401619148, "grad_norm": 3.565404176712036, "learning_rate": 8.81967213114754e-06, "loss": 0.9155, "step": 269 }, { "epoch": 0.01900739176346357, "grad_norm": 3.527479648590088, "learning_rate": 8.852459016393443e-06, "loss": 0.8826, "step": 270 }, { "epoch": 0.019077789510735656, "grad_norm": 4.499534606933594, "learning_rate": 8.885245901639344e-06, "loss": 0.8084, "step": 271 }, { "epoch": 0.019148187258007744, "grad_norm": 4.082296371459961, "learning_rate": 8.918032786885247e-06, "loss": 0.8621, "step": 272 }, { "epoch": 0.019218585005279832, "grad_norm": 3.5235462188720703, "learning_rate": 8.950819672131148e-06, "loss": 0.7596, "step": 273 }, { "epoch": 0.01928898275255192, "grad_norm": 4.5780110359191895, "learning_rate": 8.983606557377049e-06, "loss": 0.9243, "step": 274 }, { "epoch": 0.019359380499824004, "grad_norm": 5.595239162445068, "learning_rate": 9.016393442622952e-06, "loss": 0.968, "step": 275 }, { "epoch": 0.019429778247096092, "grad_norm": 5.420657157897949, "learning_rate": 9.049180327868853e-06, "loss": 0.8049, "step": 276 }, { "epoch": 0.01950017599436818, "grad_norm": 3.4581263065338135, "learning_rate": 9.081967213114755e-06, "loss": 0.7713, "step": 277 }, { "epoch": 0.019570573741640267, "grad_norm": 4.157036781311035, "learning_rate": 9.114754098360654e-06, "loss": 0.9512, "step": 278 }, { "epoch": 0.019640971488912355, "grad_norm": 3.5737931728363037, "learning_rate": 9.147540983606557e-06, "loss": 0.8692, "step": 279 }, { "epoch": 0.019711369236184443, "grad_norm": 4.260402202606201, "learning_rate": 9.180327868852458e-06, "loss": 0.9243, "step": 280 }, { "epoch": 0.01978176698345653, "grad_norm": 4.510676383972168, "learning_rate": 9.213114754098359e-06, "loss": 0.7892, "step": 281 }, { "epoch": 0.019852164730728615, "grad_norm": 4.167537212371826, "learning_rate": 9.245901639344262e-06, "loss": 1.0038, "step": 282 }, { "epoch": 0.019922562478000703, "grad_norm": 3.8265209197998047, "learning_rate": 9.278688524590163e-06, "loss": 0.9305, "step": 283 }, { "epoch": 0.01999296022527279, "grad_norm": 3.835582733154297, "learning_rate": 9.311475409836065e-06, "loss": 0.9405, "step": 284 }, { "epoch": 0.02006335797254488, "grad_norm": 3.796865224838257, "learning_rate": 9.344262295081966e-06, "loss": 0.9191, "step": 285 }, { "epoch": 0.020133755719816966, "grad_norm": 5.346733570098877, "learning_rate": 9.377049180327869e-06, "loss": 1.1239, "step": 286 }, { "epoch": 0.020204153467089054, "grad_norm": 3.646650552749634, "learning_rate": 9.40983606557377e-06, "loss": 0.7792, "step": 287 }, { "epoch": 0.02027455121436114, "grad_norm": 6.542603492736816, "learning_rate": 9.442622950819671e-06, "loss": 0.839, "step": 288 }, { "epoch": 0.020344948961633226, "grad_norm": 12.629976272583008, "learning_rate": 9.475409836065574e-06, "loss": 0.8477, "step": 289 }, { "epoch": 0.020415346708905314, "grad_norm": 3.32816743850708, "learning_rate": 9.508196721311474e-06, "loss": 0.9066, "step": 290 }, { "epoch": 0.0204857444561774, "grad_norm": 4.204385757446289, "learning_rate": 9.540983606557377e-06, "loss": 0.8649, "step": 291 }, { "epoch": 0.02055614220344949, "grad_norm": 4.297039985656738, "learning_rate": 9.573770491803278e-06, "loss": 0.9547, "step": 292 }, { "epoch": 0.020626539950721577, "grad_norm": 4.393106937408447, "learning_rate": 9.60655737704918e-06, "loss": 0.8204, "step": 293 }, { "epoch": 0.020696937697993665, "grad_norm": 3.8621678352355957, "learning_rate": 9.639344262295082e-06, "loss": 0.8103, "step": 294 }, { "epoch": 0.020767335445265753, "grad_norm": 4.772561073303223, "learning_rate": 9.672131147540983e-06, "loss": 0.8503, "step": 295 }, { "epoch": 0.02083773319253784, "grad_norm": 4.562353610992432, "learning_rate": 9.704918032786885e-06, "loss": 1.0322, "step": 296 }, { "epoch": 0.020908130939809925, "grad_norm": 3.6378495693206787, "learning_rate": 9.737704918032786e-06, "loss": 0.8998, "step": 297 }, { "epoch": 0.020978528687082013, "grad_norm": 4.551540374755859, "learning_rate": 9.770491803278689e-06, "loss": 0.8743, "step": 298 }, { "epoch": 0.0210489264343541, "grad_norm": 5.512988090515137, "learning_rate": 9.80327868852459e-06, "loss": 0.9397, "step": 299 }, { "epoch": 0.021119324181626188, "grad_norm": 4.027811050415039, "learning_rate": 9.836065573770491e-06, "loss": 0.8617, "step": 300 }, { "epoch": 0.021189721928898276, "grad_norm": 6.318881511688232, "learning_rate": 9.868852459016394e-06, "loss": 0.9393, "step": 301 }, { "epoch": 0.021260119676170364, "grad_norm": 12.178510665893555, "learning_rate": 9.901639344262295e-06, "loss": 0.7923, "step": 302 }, { "epoch": 0.02133051742344245, "grad_norm": 6.0396223068237305, "learning_rate": 9.934426229508197e-06, "loss": 0.868, "step": 303 }, { "epoch": 0.021400915170714536, "grad_norm": 3.185314178466797, "learning_rate": 9.967213114754098e-06, "loss": 0.8724, "step": 304 }, { "epoch": 0.021471312917986624, "grad_norm": 3.8482203483581543, "learning_rate": 1e-05, "loss": 0.853, "step": 305 }, { "epoch": 0.02154171066525871, "grad_norm": 3.8758749961853027, "learning_rate": 1.0032786885245902e-05, "loss": 0.8002, "step": 306 }, { "epoch": 0.0216121084125308, "grad_norm": 3.7108590602874756, "learning_rate": 1.0065573770491803e-05, "loss": 0.7803, "step": 307 }, { "epoch": 0.021682506159802887, "grad_norm": 3.105503559112549, "learning_rate": 1.0098360655737705e-05, "loss": 0.9925, "step": 308 }, { "epoch": 0.021752903907074975, "grad_norm": 3.8915374279022217, "learning_rate": 1.0131147540983606e-05, "loss": 0.955, "step": 309 }, { "epoch": 0.021823301654347062, "grad_norm": 3.0151827335357666, "learning_rate": 1.0163934426229509e-05, "loss": 0.8564, "step": 310 }, { "epoch": 0.021893699401619147, "grad_norm": 3.0709047317504883, "learning_rate": 1.019672131147541e-05, "loss": 0.7904, "step": 311 }, { "epoch": 0.021964097148891235, "grad_norm": 3.9401955604553223, "learning_rate": 1.0229508196721311e-05, "loss": 0.8529, "step": 312 }, { "epoch": 0.022034494896163322, "grad_norm": 3.1581814289093018, "learning_rate": 1.0262295081967214e-05, "loss": 0.8227, "step": 313 }, { "epoch": 0.02210489264343541, "grad_norm": 4.276463031768799, "learning_rate": 1.0295081967213115e-05, "loss": 0.8545, "step": 314 }, { "epoch": 0.022175290390707498, "grad_norm": 2.8280324935913086, "learning_rate": 1.0327868852459017e-05, "loss": 0.7905, "step": 315 }, { "epoch": 0.022245688137979586, "grad_norm": 3.7594704627990723, "learning_rate": 1.0360655737704918e-05, "loss": 0.9257, "step": 316 }, { "epoch": 0.022316085885251673, "grad_norm": 3.823127508163452, "learning_rate": 1.0393442622950821e-05, "loss": 0.817, "step": 317 }, { "epoch": 0.022386483632523758, "grad_norm": 3.0090863704681396, "learning_rate": 1.0426229508196722e-05, "loss": 0.8526, "step": 318 }, { "epoch": 0.022456881379795846, "grad_norm": 8.69205379486084, "learning_rate": 1.0459016393442623e-05, "loss": 0.8943, "step": 319 }, { "epoch": 0.022527279127067933, "grad_norm": 3.4876506328582764, "learning_rate": 1.0491803278688525e-05, "loss": 0.8231, "step": 320 }, { "epoch": 0.02259767687434002, "grad_norm": 4.229576110839844, "learning_rate": 1.0524590163934425e-05, "loss": 0.9239, "step": 321 }, { "epoch": 0.02266807462161211, "grad_norm": 4.177773952484131, "learning_rate": 1.0557377049180327e-05, "loss": 0.8573, "step": 322 }, { "epoch": 0.022738472368884197, "grad_norm": 3.3738083839416504, "learning_rate": 1.0590163934426228e-05, "loss": 1.0935, "step": 323 }, { "epoch": 0.022808870116156284, "grad_norm": 4.092806816101074, "learning_rate": 1.0622950819672131e-05, "loss": 0.8651, "step": 324 }, { "epoch": 0.02287926786342837, "grad_norm": 3.2100119590759277, "learning_rate": 1.0655737704918032e-05, "loss": 0.9224, "step": 325 }, { "epoch": 0.022949665610700457, "grad_norm": 2.491509437561035, "learning_rate": 1.0688524590163933e-05, "loss": 0.8787, "step": 326 }, { "epoch": 0.023020063357972544, "grad_norm": 3.2033119201660156, "learning_rate": 1.0721311475409836e-05, "loss": 0.8584, "step": 327 }, { "epoch": 0.023090461105244632, "grad_norm": 4.005705833435059, "learning_rate": 1.0754098360655737e-05, "loss": 0.8808, "step": 328 }, { "epoch": 0.02316085885251672, "grad_norm": 4.587125301361084, "learning_rate": 1.078688524590164e-05, "loss": 0.92, "step": 329 }, { "epoch": 0.023231256599788808, "grad_norm": 3.5061874389648438, "learning_rate": 1.081967213114754e-05, "loss": 0.9886, "step": 330 }, { "epoch": 0.023301654347060895, "grad_norm": 3.692006826400757, "learning_rate": 1.0852459016393443e-05, "loss": 0.87, "step": 331 }, { "epoch": 0.02337205209433298, "grad_norm": 3.919563055038452, "learning_rate": 1.0885245901639344e-05, "loss": 0.7871, "step": 332 }, { "epoch": 0.023442449841605068, "grad_norm": 3.279864549636841, "learning_rate": 1.0918032786885245e-05, "loss": 0.9709, "step": 333 }, { "epoch": 0.023512847588877155, "grad_norm": 3.615349531173706, "learning_rate": 1.0950819672131147e-05, "loss": 0.8734, "step": 334 }, { "epoch": 0.023583245336149243, "grad_norm": 4.019484996795654, "learning_rate": 1.0983606557377048e-05, "loss": 0.9027, "step": 335 }, { "epoch": 0.02365364308342133, "grad_norm": 4.335453987121582, "learning_rate": 1.1016393442622951e-05, "loss": 0.8611, "step": 336 }, { "epoch": 0.02372404083069342, "grad_norm": 4.048886299133301, "learning_rate": 1.1049180327868852e-05, "loss": 0.9317, "step": 337 }, { "epoch": 0.023794438577965506, "grad_norm": 3.2750508785247803, "learning_rate": 1.1081967213114753e-05, "loss": 0.9007, "step": 338 }, { "epoch": 0.02386483632523759, "grad_norm": 2.9000089168548584, "learning_rate": 1.1114754098360656e-05, "loss": 0.8333, "step": 339 }, { "epoch": 0.02393523407250968, "grad_norm": 7.008626937866211, "learning_rate": 1.1147540983606557e-05, "loss": 0.8901, "step": 340 }, { "epoch": 0.024005631819781766, "grad_norm": 4.288931369781494, "learning_rate": 1.118032786885246e-05, "loss": 0.8171, "step": 341 }, { "epoch": 0.024076029567053854, "grad_norm": 2.9231760501861572, "learning_rate": 1.121311475409836e-05, "loss": 0.807, "step": 342 }, { "epoch": 0.024146427314325942, "grad_norm": 4.322519302368164, "learning_rate": 1.1245901639344263e-05, "loss": 0.9253, "step": 343 }, { "epoch": 0.02421682506159803, "grad_norm": 5.361907005310059, "learning_rate": 1.1278688524590164e-05, "loss": 0.9289, "step": 344 }, { "epoch": 0.024287222808870117, "grad_norm": 5.50483512878418, "learning_rate": 1.1311475409836065e-05, "loss": 1.0301, "step": 345 }, { "epoch": 0.024357620556142205, "grad_norm": 3.138530969619751, "learning_rate": 1.1344262295081967e-05, "loss": 0.9246, "step": 346 }, { "epoch": 0.02442801830341429, "grad_norm": 3.2169291973114014, "learning_rate": 1.1377049180327868e-05, "loss": 0.8365, "step": 347 }, { "epoch": 0.024498416050686377, "grad_norm": 3.394390344619751, "learning_rate": 1.1409836065573771e-05, "loss": 0.9969, "step": 348 }, { "epoch": 0.024568813797958465, "grad_norm": 4.191263675689697, "learning_rate": 1.1442622950819672e-05, "loss": 0.9901, "step": 349 }, { "epoch": 0.024639211545230553, "grad_norm": 3.2689449787139893, "learning_rate": 1.1475409836065575e-05, "loss": 0.8752, "step": 350 }, { "epoch": 0.02470960929250264, "grad_norm": 3.0763604640960693, "learning_rate": 1.1508196721311476e-05, "loss": 0.8122, "step": 351 }, { "epoch": 0.02478000703977473, "grad_norm": 3.4816110134124756, "learning_rate": 1.1540983606557377e-05, "loss": 0.9626, "step": 352 }, { "epoch": 0.024850404787046816, "grad_norm": 3.3246467113494873, "learning_rate": 1.157377049180328e-05, "loss": 0.9586, "step": 353 }, { "epoch": 0.0249208025343189, "grad_norm": 4.295650482177734, "learning_rate": 1.160655737704918e-05, "loss": 0.9201, "step": 354 }, { "epoch": 0.024991200281590988, "grad_norm": 3.224130153656006, "learning_rate": 1.1639344262295083e-05, "loss": 0.8959, "step": 355 }, { "epoch": 0.025061598028863076, "grad_norm": 2.9787838459014893, "learning_rate": 1.1672131147540984e-05, "loss": 0.7972, "step": 356 }, { "epoch": 0.025131995776135164, "grad_norm": 4.881450176239014, "learning_rate": 1.1704918032786885e-05, "loss": 0.9564, "step": 357 }, { "epoch": 0.02520239352340725, "grad_norm": 3.2482476234436035, "learning_rate": 1.1737704918032788e-05, "loss": 0.8253, "step": 358 }, { "epoch": 0.02527279127067934, "grad_norm": 3.149916172027588, "learning_rate": 1.1770491803278688e-05, "loss": 0.9232, "step": 359 }, { "epoch": 0.025343189017951427, "grad_norm": 2.9563393592834473, "learning_rate": 1.1803278688524591e-05, "loss": 0.8205, "step": 360 }, { "epoch": 0.02541358676522351, "grad_norm": 3.6794357299804688, "learning_rate": 1.1836065573770492e-05, "loss": 1.0042, "step": 361 }, { "epoch": 0.0254839845124956, "grad_norm": 4.08628511428833, "learning_rate": 1.1868852459016395e-05, "loss": 0.8463, "step": 362 }, { "epoch": 0.025554382259767687, "grad_norm": 3.9274065494537354, "learning_rate": 1.1901639344262294e-05, "loss": 0.8509, "step": 363 }, { "epoch": 0.025624780007039775, "grad_norm": 3.9656147956848145, "learning_rate": 1.1934426229508195e-05, "loss": 0.8812, "step": 364 }, { "epoch": 0.025695177754311863, "grad_norm": 2.792161226272583, "learning_rate": 1.1967213114754098e-05, "loss": 0.924, "step": 365 }, { "epoch": 0.02576557550158395, "grad_norm": 3.3945376873016357, "learning_rate": 1.1999999999999999e-05, "loss": 0.8569, "step": 366 }, { "epoch": 0.025835973248856038, "grad_norm": 2.9763596057891846, "learning_rate": 1.2032786885245901e-05, "loss": 0.8601, "step": 367 }, { "epoch": 0.025906370996128122, "grad_norm": 3.2409420013427734, "learning_rate": 1.2065573770491802e-05, "loss": 0.7626, "step": 368 }, { "epoch": 0.02597676874340021, "grad_norm": 3.584272623062134, "learning_rate": 1.2098360655737705e-05, "loss": 0.8841, "step": 369 }, { "epoch": 0.026047166490672298, "grad_norm": 8.443313598632812, "learning_rate": 1.2131147540983606e-05, "loss": 0.8161, "step": 370 }, { "epoch": 0.026117564237944386, "grad_norm": 4.192190170288086, "learning_rate": 1.2163934426229507e-05, "loss": 0.9495, "step": 371 }, { "epoch": 0.026187961985216474, "grad_norm": 3.1835973262786865, "learning_rate": 1.219672131147541e-05, "loss": 0.8923, "step": 372 }, { "epoch": 0.02625835973248856, "grad_norm": 8.168956756591797, "learning_rate": 1.222950819672131e-05, "loss": 0.8354, "step": 373 }, { "epoch": 0.02632875747976065, "grad_norm": 3.3233137130737305, "learning_rate": 1.2262295081967213e-05, "loss": 0.8309, "step": 374 }, { "epoch": 0.026399155227032733, "grad_norm": 4.14939546585083, "learning_rate": 1.2295081967213114e-05, "loss": 1.0437, "step": 375 }, { "epoch": 0.02646955297430482, "grad_norm": 3.9036550521850586, "learning_rate": 1.2327868852459017e-05, "loss": 0.8834, "step": 376 }, { "epoch": 0.02653995072157691, "grad_norm": 2.8705637454986572, "learning_rate": 1.2360655737704918e-05, "loss": 0.9176, "step": 377 }, { "epoch": 0.026610348468848997, "grad_norm": 3.9401586055755615, "learning_rate": 1.2393442622950819e-05, "loss": 0.9489, "step": 378 }, { "epoch": 0.026680746216121085, "grad_norm": 4.097420692443848, "learning_rate": 1.2426229508196721e-05, "loss": 0.8844, "step": 379 }, { "epoch": 0.026751143963393172, "grad_norm": 2.4673445224761963, "learning_rate": 1.2459016393442622e-05, "loss": 1.0373, "step": 380 }, { "epoch": 0.02682154171066526, "grad_norm": 3.5751023292541504, "learning_rate": 1.2491803278688525e-05, "loss": 1.0651, "step": 381 }, { "epoch": 0.026891939457937344, "grad_norm": 3.6647045612335205, "learning_rate": 1.2524590163934426e-05, "loss": 0.9208, "step": 382 }, { "epoch": 0.026962337205209432, "grad_norm": 3.4178709983825684, "learning_rate": 1.2557377049180327e-05, "loss": 0.7167, "step": 383 }, { "epoch": 0.02703273495248152, "grad_norm": 3.1912715435028076, "learning_rate": 1.259016393442623e-05, "loss": 0.8717, "step": 384 }, { "epoch": 0.027103132699753608, "grad_norm": 4.120757102966309, "learning_rate": 1.262295081967213e-05, "loss": 0.8786, "step": 385 }, { "epoch": 0.027173530447025696, "grad_norm": 3.8534224033355713, "learning_rate": 1.2655737704918033e-05, "loss": 0.8859, "step": 386 }, { "epoch": 0.027243928194297783, "grad_norm": 4.2396674156188965, "learning_rate": 1.2688524590163934e-05, "loss": 0.8825, "step": 387 }, { "epoch": 0.02731432594156987, "grad_norm": 6.677631378173828, "learning_rate": 1.2721311475409837e-05, "loss": 0.9759, "step": 388 }, { "epoch": 0.027384723688841955, "grad_norm": 3.362023115158081, "learning_rate": 1.2754098360655738e-05, "loss": 0.8176, "step": 389 }, { "epoch": 0.027455121436114043, "grad_norm": 3.2810916900634766, "learning_rate": 1.2786885245901639e-05, "loss": 0.7615, "step": 390 }, { "epoch": 0.02752551918338613, "grad_norm": 4.447373867034912, "learning_rate": 1.2819672131147541e-05, "loss": 0.9375, "step": 391 }, { "epoch": 0.02759591693065822, "grad_norm": 3.0620594024658203, "learning_rate": 1.2852459016393442e-05, "loss": 0.8822, "step": 392 }, { "epoch": 0.027666314677930307, "grad_norm": 3.624209403991699, "learning_rate": 1.2885245901639345e-05, "loss": 0.831, "step": 393 }, { "epoch": 0.027736712425202394, "grad_norm": 4.090682506561279, "learning_rate": 1.2918032786885246e-05, "loss": 0.8782, "step": 394 }, { "epoch": 0.027807110172474482, "grad_norm": 3.303088903427124, "learning_rate": 1.2950819672131147e-05, "loss": 0.9747, "step": 395 }, { "epoch": 0.027877507919746566, "grad_norm": 3.6377437114715576, "learning_rate": 1.298360655737705e-05, "loss": 0.8018, "step": 396 }, { "epoch": 0.027947905667018654, "grad_norm": 3.304927349090576, "learning_rate": 1.301639344262295e-05, "loss": 0.7309, "step": 397 }, { "epoch": 0.028018303414290742, "grad_norm": 3.411590337753296, "learning_rate": 1.3049180327868853e-05, "loss": 0.8462, "step": 398 }, { "epoch": 0.02808870116156283, "grad_norm": 2.687112331390381, "learning_rate": 1.3081967213114754e-05, "loss": 0.8583, "step": 399 }, { "epoch": 0.028159098908834918, "grad_norm": 3.0755984783172607, "learning_rate": 1.3114754098360657e-05, "loss": 0.8903, "step": 400 }, { "epoch": 0.028229496656107005, "grad_norm": 4.091688632965088, "learning_rate": 1.3147540983606558e-05, "loss": 0.8205, "step": 401 }, { "epoch": 0.028299894403379093, "grad_norm": 3.68048095703125, "learning_rate": 1.3180327868852459e-05, "loss": 0.9744, "step": 402 }, { "epoch": 0.02837029215065118, "grad_norm": 3.4734582901000977, "learning_rate": 1.3213114754098361e-05, "loss": 0.7506, "step": 403 }, { "epoch": 0.028440689897923265, "grad_norm": 4.7491631507873535, "learning_rate": 1.3245901639344262e-05, "loss": 0.9155, "step": 404 }, { "epoch": 0.028511087645195353, "grad_norm": 2.956101417541504, "learning_rate": 1.3278688524590165e-05, "loss": 0.8446, "step": 405 }, { "epoch": 0.02858148539246744, "grad_norm": 4.001038074493408, "learning_rate": 1.3311475409836064e-05, "loss": 0.9084, "step": 406 }, { "epoch": 0.02865188313973953, "grad_norm": 2.7428414821624756, "learning_rate": 1.3344262295081967e-05, "loss": 0.8285, "step": 407 }, { "epoch": 0.028722280887011616, "grad_norm": 2.962327480316162, "learning_rate": 1.3377049180327868e-05, "loss": 0.7576, "step": 408 }, { "epoch": 0.028792678634283704, "grad_norm": 2.857193946838379, "learning_rate": 1.3409836065573769e-05, "loss": 0.8189, "step": 409 }, { "epoch": 0.028863076381555792, "grad_norm": 2.9832582473754883, "learning_rate": 1.3442622950819672e-05, "loss": 0.8578, "step": 410 }, { "epoch": 0.028933474128827876, "grad_norm": 3.54685115814209, "learning_rate": 1.3475409836065573e-05, "loss": 0.8826, "step": 411 }, { "epoch": 0.029003871876099964, "grad_norm": 3.376476287841797, "learning_rate": 1.3508196721311475e-05, "loss": 0.8494, "step": 412 }, { "epoch": 0.02907426962337205, "grad_norm": 2.993804931640625, "learning_rate": 1.3540983606557376e-05, "loss": 0.8651, "step": 413 }, { "epoch": 0.02914466737064414, "grad_norm": 3.983947277069092, "learning_rate": 1.3573770491803279e-05, "loss": 0.786, "step": 414 }, { "epoch": 0.029215065117916227, "grad_norm": 3.155109167098999, "learning_rate": 1.360655737704918e-05, "loss": 0.8423, "step": 415 }, { "epoch": 0.029285462865188315, "grad_norm": 2.866457223892212, "learning_rate": 1.363934426229508e-05, "loss": 0.8535, "step": 416 }, { "epoch": 0.029355860612460403, "grad_norm": 3.9705259799957275, "learning_rate": 1.3672131147540983e-05, "loss": 0.8879, "step": 417 }, { "epoch": 0.029426258359732487, "grad_norm": 2.7610883712768555, "learning_rate": 1.3704918032786884e-05, "loss": 0.8251, "step": 418 }, { "epoch": 0.029496656107004575, "grad_norm": 4.710381984710693, "learning_rate": 1.3737704918032787e-05, "loss": 0.8721, "step": 419 }, { "epoch": 0.029567053854276663, "grad_norm": 3.6030678749084473, "learning_rate": 1.3770491803278688e-05, "loss": 0.8505, "step": 420 }, { "epoch": 0.02963745160154875, "grad_norm": 4.483442783355713, "learning_rate": 1.3803278688524589e-05, "loss": 0.974, "step": 421 }, { "epoch": 0.029707849348820838, "grad_norm": 2.9787371158599854, "learning_rate": 1.3836065573770492e-05, "loss": 0.8912, "step": 422 }, { "epoch": 0.029778247096092926, "grad_norm": 10.03062629699707, "learning_rate": 1.3868852459016393e-05, "loss": 0.8466, "step": 423 }, { "epoch": 0.029848644843365014, "grad_norm": 3.2609753608703613, "learning_rate": 1.3901639344262295e-05, "loss": 1.019, "step": 424 }, { "epoch": 0.029919042590637098, "grad_norm": 3.382953405380249, "learning_rate": 1.3934426229508196e-05, "loss": 0.7309, "step": 425 }, { "epoch": 0.029989440337909186, "grad_norm": 3.458962917327881, "learning_rate": 1.3967213114754099e-05, "loss": 0.7334, "step": 426 }, { "epoch": 0.030059838085181274, "grad_norm": 2.909278392791748, "learning_rate": 1.4e-05, "loss": 0.8875, "step": 427 }, { "epoch": 0.03013023583245336, "grad_norm": 2.987887144088745, "learning_rate": 1.3999999818031829e-05, "loss": 0.8224, "step": 428 }, { "epoch": 0.03020063357972545, "grad_norm": 4.058437824249268, "learning_rate": 1.399999927212733e-05, "loss": 0.8063, "step": 429 }, { "epoch": 0.030271031326997537, "grad_norm": 3.4275295734405518, "learning_rate": 1.399999836228653e-05, "loss": 0.7084, "step": 430 }, { "epoch": 0.030341429074269625, "grad_norm": 2.714033842086792, "learning_rate": 1.3999997088509473e-05, "loss": 0.9475, "step": 431 }, { "epoch": 0.03041182682154171, "grad_norm": 3.0211703777313232, "learning_rate": 1.3999995450796226e-05, "loss": 0.9334, "step": 432 }, { "epoch": 0.030482224568813797, "grad_norm": 3.760977268218994, "learning_rate": 1.399999344914688e-05, "loss": 0.9017, "step": 433 }, { "epoch": 0.030552622316085885, "grad_norm": 3.030567169189453, "learning_rate": 1.3999991083561535e-05, "loss": 0.8015, "step": 434 }, { "epoch": 0.030623020063357972, "grad_norm": 2.8472259044647217, "learning_rate": 1.399998835404031e-05, "loss": 0.9097, "step": 435 }, { "epoch": 0.03069341781063006, "grad_norm": 3.631469249725342, "learning_rate": 1.3999985260583356e-05, "loss": 0.9332, "step": 436 }, { "epoch": 0.030763815557902148, "grad_norm": 3.273707151412964, "learning_rate": 1.3999981803190826e-05, "loss": 0.8974, "step": 437 }, { "epoch": 0.030834213305174236, "grad_norm": 3.247506618499756, "learning_rate": 1.3999977981862903e-05, "loss": 0.8162, "step": 438 }, { "epoch": 0.03090461105244632, "grad_norm": 2.9269092082977295, "learning_rate": 1.3999973796599787e-05, "loss": 0.7554, "step": 439 }, { "epoch": 0.030975008799718408, "grad_norm": 4.147475719451904, "learning_rate": 1.399996924740169e-05, "loss": 0.7695, "step": 440 }, { "epoch": 0.031045406546990496, "grad_norm": 2.865320920944214, "learning_rate": 1.3999964334268854e-05, "loss": 0.7519, "step": 441 }, { "epoch": 0.031115804294262583, "grad_norm": 3.071187973022461, "learning_rate": 1.3999959057201534e-05, "loss": 0.8914, "step": 442 }, { "epoch": 0.03118620204153467, "grad_norm": 3.1855711936950684, "learning_rate": 1.39999534162e-05, "loss": 0.836, "step": 443 }, { "epoch": 0.03125659978880676, "grad_norm": 3.908602714538574, "learning_rate": 1.3999947411264552e-05, "loss": 0.7964, "step": 444 }, { "epoch": 0.03132699753607884, "grad_norm": 2.958383798599243, "learning_rate": 1.3999941042395497e-05, "loss": 0.7704, "step": 445 }, { "epoch": 0.031397395283350935, "grad_norm": 3.568150043487549, "learning_rate": 1.3999934309593166e-05, "loss": 0.852, "step": 446 }, { "epoch": 0.03146779303062302, "grad_norm": 3.233139991760254, "learning_rate": 1.3999927212857913e-05, "loss": 0.6286, "step": 447 }, { "epoch": 0.03153819077789511, "grad_norm": 3.2236971855163574, "learning_rate": 1.3999919752190102e-05, "loss": 0.8857, "step": 448 }, { "epoch": 0.031608588525167194, "grad_norm": 3.905930995941162, "learning_rate": 1.3999911927590125e-05, "loss": 0.8415, "step": 449 }, { "epoch": 0.03167898627243928, "grad_norm": 2.910520076751709, "learning_rate": 1.3999903739058389e-05, "loss": 0.9067, "step": 450 }, { "epoch": 0.03174938401971137, "grad_norm": 2.934964895248413, "learning_rate": 1.3999895186595318e-05, "loss": 0.7156, "step": 451 }, { "epoch": 0.031819781766983454, "grad_norm": 2.808274030685425, "learning_rate": 1.3999886270201355e-05, "loss": 0.8309, "step": 452 }, { "epoch": 0.031890179514255546, "grad_norm": 4.40629243850708, "learning_rate": 1.3999876989876965e-05, "loss": 0.6986, "step": 453 }, { "epoch": 0.03196057726152763, "grad_norm": 7.634444713592529, "learning_rate": 1.3999867345622634e-05, "loss": 0.9843, "step": 454 }, { "epoch": 0.03203097500879972, "grad_norm": 2.8840482234954834, "learning_rate": 1.3999857337438856e-05, "loss": 0.8994, "step": 455 }, { "epoch": 0.032101372756071805, "grad_norm": 3.249030113220215, "learning_rate": 1.3999846965326159e-05, "loss": 0.8732, "step": 456 }, { "epoch": 0.03217177050334389, "grad_norm": 2.768885374069214, "learning_rate": 1.3999836229285078e-05, "loss": 0.7642, "step": 457 }, { "epoch": 0.03224216825061598, "grad_norm": 2.7615644931793213, "learning_rate": 1.399982512931617e-05, "loss": 0.8237, "step": 458 }, { "epoch": 0.032312565997888065, "grad_norm": 7.379369258880615, "learning_rate": 1.3999813665420017e-05, "loss": 0.9239, "step": 459 }, { "epoch": 0.032382963745160157, "grad_norm": 2.8263051509857178, "learning_rate": 1.399980183759721e-05, "loss": 0.7436, "step": 460 }, { "epoch": 0.03245336149243224, "grad_norm": 3.3926925659179688, "learning_rate": 1.3999789645848368e-05, "loss": 0.8874, "step": 461 }, { "epoch": 0.03252375923970433, "grad_norm": 4.752262115478516, "learning_rate": 1.3999777090174121e-05, "loss": 0.8465, "step": 462 }, { "epoch": 0.032594156986976416, "grad_norm": 2.965500593185425, "learning_rate": 1.3999764170575125e-05, "loss": 0.894, "step": 463 }, { "epoch": 0.0326645547342485, "grad_norm": 3.2397937774658203, "learning_rate": 1.399975088705205e-05, "loss": 0.8432, "step": 464 }, { "epoch": 0.03273495248152059, "grad_norm": 3.501350164413452, "learning_rate": 1.3999737239605587e-05, "loss": 0.8014, "step": 465 }, { "epoch": 0.032805350228792676, "grad_norm": 2.5790843963623047, "learning_rate": 1.3999723228236445e-05, "loss": 0.7864, "step": 466 }, { "epoch": 0.03287574797606477, "grad_norm": 3.581630229949951, "learning_rate": 1.3999708852945353e-05, "loss": 0.7955, "step": 467 }, { "epoch": 0.03294614572333685, "grad_norm": 3.182894229888916, "learning_rate": 1.399969411373306e-05, "loss": 0.8419, "step": 468 }, { "epoch": 0.03301654347060894, "grad_norm": 2.7621166706085205, "learning_rate": 1.399967901060033e-05, "loss": 0.8543, "step": 469 }, { "epoch": 0.03308694121788103, "grad_norm": 3.957764148712158, "learning_rate": 1.3999663543547949e-05, "loss": 0.8113, "step": 470 }, { "epoch": 0.03315733896515311, "grad_norm": 3.3380677700042725, "learning_rate": 1.399964771257672e-05, "loss": 0.8188, "step": 471 }, { "epoch": 0.0332277367124252, "grad_norm": 3.0422704219818115, "learning_rate": 1.3999631517687468e-05, "loss": 0.9065, "step": 472 }, { "epoch": 0.03329813445969729, "grad_norm": 3.3505020141601562, "learning_rate": 1.3999614958881036e-05, "loss": 1.0004, "step": 473 }, { "epoch": 0.03336853220696938, "grad_norm": 3.649298906326294, "learning_rate": 1.399959803615828e-05, "loss": 0.7714, "step": 474 }, { "epoch": 0.03343892995424146, "grad_norm": 3.2996115684509277, "learning_rate": 1.3999580749520084e-05, "loss": 0.8265, "step": 475 }, { "epoch": 0.033509327701513554, "grad_norm": 2.788752555847168, "learning_rate": 1.3999563098967344e-05, "loss": 0.8221, "step": 476 }, { "epoch": 0.03357972544878564, "grad_norm": 3.599248170852661, "learning_rate": 1.3999545084500983e-05, "loss": 0.8316, "step": 477 }, { "epoch": 0.03365012319605773, "grad_norm": 2.5447847843170166, "learning_rate": 1.3999526706121932e-05, "loss": 0.7904, "step": 478 }, { "epoch": 0.033720520943329814, "grad_norm": 2.77638578414917, "learning_rate": 1.3999507963831148e-05, "loss": 0.9359, "step": 479 }, { "epoch": 0.0337909186906019, "grad_norm": 2.4172489643096924, "learning_rate": 1.3999488857629606e-05, "loss": 0.8059, "step": 480 }, { "epoch": 0.03386131643787399, "grad_norm": 7.59112024307251, "learning_rate": 1.3999469387518299e-05, "loss": 0.788, "step": 481 }, { "epoch": 0.033931714185146074, "grad_norm": 3.0883567333221436, "learning_rate": 1.3999449553498239e-05, "loss": 0.8457, "step": 482 }, { "epoch": 0.034002111932418165, "grad_norm": 3.146864414215088, "learning_rate": 1.399942935557046e-05, "loss": 0.745, "step": 483 }, { "epoch": 0.03407250967969025, "grad_norm": 6.104170799255371, "learning_rate": 1.3999408793736007e-05, "loss": 0.8233, "step": 484 }, { "epoch": 0.03414290742696234, "grad_norm": 3.1644911766052246, "learning_rate": 1.3999387867995954e-05, "loss": 0.9187, "step": 485 }, { "epoch": 0.034213305174234425, "grad_norm": 3.0731852054595947, "learning_rate": 1.3999366578351386e-05, "loss": 0.8721, "step": 486 }, { "epoch": 0.03428370292150651, "grad_norm": 3.164170742034912, "learning_rate": 1.399934492480341e-05, "loss": 0.8133, "step": 487 }, { "epoch": 0.0343541006687786, "grad_norm": 3.7331066131591797, "learning_rate": 1.3999322907353152e-05, "loss": 0.8101, "step": 488 }, { "epoch": 0.034424498416050685, "grad_norm": 3.648189067840576, "learning_rate": 1.3999300526001758e-05, "loss": 0.9061, "step": 489 }, { "epoch": 0.034494896163322776, "grad_norm": 3.298630714416504, "learning_rate": 1.399927778075039e-05, "loss": 0.824, "step": 490 }, { "epoch": 0.03456529391059486, "grad_norm": 2.7077085971832275, "learning_rate": 1.399925467160023e-05, "loss": 0.7211, "step": 491 }, { "epoch": 0.03463569165786695, "grad_norm": 3.1308085918426514, "learning_rate": 1.3999231198552484e-05, "loss": 0.8887, "step": 492 }, { "epoch": 0.034706089405139036, "grad_norm": 3.9145584106445312, "learning_rate": 1.399920736160837e-05, "loss": 0.8735, "step": 493 }, { "epoch": 0.03477648715241112, "grad_norm": 3.065737009048462, "learning_rate": 1.3999183160769123e-05, "loss": 0.842, "step": 494 }, { "epoch": 0.03484688489968321, "grad_norm": 2.5276222229003906, "learning_rate": 1.3999158596036004e-05, "loss": 0.9675, "step": 495 }, { "epoch": 0.034917282646955296, "grad_norm": 3.0536723136901855, "learning_rate": 1.3999133667410293e-05, "loss": 0.7658, "step": 496 }, { "epoch": 0.03498768039422739, "grad_norm": 3.9542236328125, "learning_rate": 1.3999108374893284e-05, "loss": 0.7336, "step": 497 }, { "epoch": 0.03505807814149947, "grad_norm": 6.914444923400879, "learning_rate": 1.399908271848629e-05, "loss": 0.7613, "step": 498 }, { "epoch": 0.03512847588877156, "grad_norm": 3.5489625930786133, "learning_rate": 1.3999056698190648e-05, "loss": 0.8326, "step": 499 }, { "epoch": 0.03519887363604365, "grad_norm": 3.487492084503174, "learning_rate": 1.399903031400771e-05, "loss": 0.8483, "step": 500 }, { "epoch": 0.03526927138331573, "grad_norm": 3.68188738822937, "learning_rate": 1.3999003565938846e-05, "loss": 0.8374, "step": 501 }, { "epoch": 0.03533966913058782, "grad_norm": 3.024183750152588, "learning_rate": 1.3998976453985448e-05, "loss": 0.917, "step": 502 }, { "epoch": 0.03541006687785991, "grad_norm": 3.220881462097168, "learning_rate": 1.3998948978148927e-05, "loss": 0.9516, "step": 503 }, { "epoch": 0.035480464625132, "grad_norm": 3.5055460929870605, "learning_rate": 1.3998921138430708e-05, "loss": 0.8359, "step": 504 }, { "epoch": 0.03555086237240408, "grad_norm": 3.837888717651367, "learning_rate": 1.3998892934832241e-05, "loss": 0.8461, "step": 505 }, { "epoch": 0.035621260119676174, "grad_norm": 3.272800922393799, "learning_rate": 1.3998864367354991e-05, "loss": 0.8408, "step": 506 }, { "epoch": 0.03569165786694826, "grad_norm": 5.356675624847412, "learning_rate": 1.3998835436000448e-05, "loss": 0.8245, "step": 507 }, { "epoch": 0.03576205561422034, "grad_norm": 2.628054618835449, "learning_rate": 1.3998806140770108e-05, "loss": 0.887, "step": 508 }, { "epoch": 0.03583245336149243, "grad_norm": 3.076587677001953, "learning_rate": 1.39987764816655e-05, "loss": 0.8769, "step": 509 }, { "epoch": 0.03590285110876452, "grad_norm": 3.016392707824707, "learning_rate": 1.3998746458688163e-05, "loss": 0.9284, "step": 510 }, { "epoch": 0.03597324885603661, "grad_norm": 2.8268473148345947, "learning_rate": 1.3998716071839662e-05, "loss": 0.8744, "step": 511 }, { "epoch": 0.03604364660330869, "grad_norm": 2.404001235961914, "learning_rate": 1.399868532112157e-05, "loss": 0.9969, "step": 512 }, { "epoch": 0.036114044350580785, "grad_norm": 3.0389187335968018, "learning_rate": 1.3998654206535492e-05, "loss": 0.9624, "step": 513 }, { "epoch": 0.03618444209785287, "grad_norm": 3.059232473373413, "learning_rate": 1.3998622728083044e-05, "loss": 0.8563, "step": 514 }, { "epoch": 0.03625483984512495, "grad_norm": 2.895965099334717, "learning_rate": 1.3998590885765861e-05, "loss": 0.774, "step": 515 }, { "epoch": 0.036325237592397044, "grad_norm": 3.0412559509277344, "learning_rate": 1.39985586795856e-05, "loss": 0.765, "step": 516 }, { "epoch": 0.03639563533966913, "grad_norm": 3.577714681625366, "learning_rate": 1.3998526109543935e-05, "loss": 0.7684, "step": 517 }, { "epoch": 0.03646603308694122, "grad_norm": 3.3580434322357178, "learning_rate": 1.399849317564256e-05, "loss": 0.8338, "step": 518 }, { "epoch": 0.036536430834213304, "grad_norm": 3.4461493492126465, "learning_rate": 1.3998459877883186e-05, "loss": 0.7974, "step": 519 }, { "epoch": 0.036606828581485396, "grad_norm": 2.9838409423828125, "learning_rate": 1.3998426216267543e-05, "loss": 0.7476, "step": 520 }, { "epoch": 0.03667722632875748, "grad_norm": 3.0507543087005615, "learning_rate": 1.3998392190797386e-05, "loss": 0.9019, "step": 521 }, { "epoch": 0.036747624076029564, "grad_norm": 3.39325213432312, "learning_rate": 1.3998357801474478e-05, "loss": 0.8099, "step": 522 }, { "epoch": 0.036818021823301655, "grad_norm": 3.2275614738464355, "learning_rate": 1.3998323048300611e-05, "loss": 0.833, "step": 523 }, { "epoch": 0.03688841957057374, "grad_norm": 2.5024795532226562, "learning_rate": 1.3998287931277591e-05, "loss": 0.9081, "step": 524 }, { "epoch": 0.03695881731784583, "grad_norm": 2.9611589908599854, "learning_rate": 1.3998252450407244e-05, "loss": 0.8463, "step": 525 }, { "epoch": 0.037029215065117915, "grad_norm": 3.1987977027893066, "learning_rate": 1.3998216605691412e-05, "loss": 0.7821, "step": 526 }, { "epoch": 0.037099612812390007, "grad_norm": 3.1313233375549316, "learning_rate": 1.3998180397131961e-05, "loss": 0.8094, "step": 527 }, { "epoch": 0.03717001055966209, "grad_norm": 2.824742317199707, "learning_rate": 1.3998143824730776e-05, "loss": 0.8074, "step": 528 }, { "epoch": 0.037240408306934175, "grad_norm": 2.761240243911743, "learning_rate": 1.3998106888489754e-05, "loss": 0.8212, "step": 529 }, { "epoch": 0.037310806054206266, "grad_norm": 3.0550966262817383, "learning_rate": 1.3998069588410818e-05, "loss": 0.8611, "step": 530 }, { "epoch": 0.03738120380147835, "grad_norm": 2.5256669521331787, "learning_rate": 1.3998031924495905e-05, "loss": 0.7901, "step": 531 }, { "epoch": 0.03745160154875044, "grad_norm": 2.9315972328186035, "learning_rate": 1.3997993896746972e-05, "loss": 0.8549, "step": 532 }, { "epoch": 0.037521999296022526, "grad_norm": 2.3416874408721924, "learning_rate": 1.3997955505166002e-05, "loss": 0.7122, "step": 533 }, { "epoch": 0.03759239704329462, "grad_norm": 2.7199528217315674, "learning_rate": 1.3997916749754988e-05, "loss": 0.7902, "step": 534 }, { "epoch": 0.0376627947905667, "grad_norm": 3.4181931018829346, "learning_rate": 1.3997877630515943e-05, "loss": 0.8113, "step": 535 }, { "epoch": 0.037733192537838786, "grad_norm": 2.9263803958892822, "learning_rate": 1.3997838147450903e-05, "loss": 0.9299, "step": 536 }, { "epoch": 0.03780359028511088, "grad_norm": 2.4479856491088867, "learning_rate": 1.399779830056192e-05, "loss": 0.8647, "step": 537 }, { "epoch": 0.03787398803238296, "grad_norm": 3.3899643421173096, "learning_rate": 1.3997758089851065e-05, "loss": 0.793, "step": 538 }, { "epoch": 0.03794438577965505, "grad_norm": 2.6238455772399902, "learning_rate": 1.3997717515320428e-05, "loss": 0.8028, "step": 539 }, { "epoch": 0.03801478352692714, "grad_norm": 3.8376450538635254, "learning_rate": 1.3997676576972122e-05, "loss": 0.898, "step": 540 }, { "epoch": 0.03808518127419923, "grad_norm": 2.825827121734619, "learning_rate": 1.3997635274808273e-05, "loss": 0.9195, "step": 541 }, { "epoch": 0.03815557902147131, "grad_norm": 2.6318182945251465, "learning_rate": 1.3997593608831028e-05, "loss": 0.7487, "step": 542 }, { "epoch": 0.0382259767687434, "grad_norm": 2.5382261276245117, "learning_rate": 1.3997551579042555e-05, "loss": 0.7593, "step": 543 }, { "epoch": 0.03829637451601549, "grad_norm": 3.178757667541504, "learning_rate": 1.3997509185445037e-05, "loss": 0.958, "step": 544 }, { "epoch": 0.03836677226328757, "grad_norm": 2.946504831314087, "learning_rate": 1.3997466428040681e-05, "loss": 0.8427, "step": 545 }, { "epoch": 0.038437170010559664, "grad_norm": 3.6611249446868896, "learning_rate": 1.3997423306831707e-05, "loss": 0.8035, "step": 546 }, { "epoch": 0.03850756775783175, "grad_norm": 2.8863515853881836, "learning_rate": 1.3997379821820358e-05, "loss": 0.8334, "step": 547 }, { "epoch": 0.03857796550510384, "grad_norm": 3.2965848445892334, "learning_rate": 1.3997335973008894e-05, "loss": 0.843, "step": 548 }, { "epoch": 0.038648363252375924, "grad_norm": 2.537041425704956, "learning_rate": 1.3997291760399596e-05, "loss": 0.7059, "step": 549 }, { "epoch": 0.03871876099964801, "grad_norm": 2.905217170715332, "learning_rate": 1.3997247183994762e-05, "loss": 0.8397, "step": 550 }, { "epoch": 0.0387891587469201, "grad_norm": 3.3437349796295166, "learning_rate": 1.3997202243796712e-05, "loss": 0.8168, "step": 551 }, { "epoch": 0.038859556494192184, "grad_norm": 3.1850955486297607, "learning_rate": 1.3997156939807778e-05, "loss": 0.7065, "step": 552 }, { "epoch": 0.038929954241464275, "grad_norm": 4.86928653717041, "learning_rate": 1.3997111272030317e-05, "loss": 0.8004, "step": 553 }, { "epoch": 0.03900035198873636, "grad_norm": 2.6666688919067383, "learning_rate": 1.3997065240466707e-05, "loss": 0.7804, "step": 554 }, { "epoch": 0.03907074973600845, "grad_norm": 4.539239883422852, "learning_rate": 1.3997018845119336e-05, "loss": 0.741, "step": 555 }, { "epoch": 0.039141147483280535, "grad_norm": 3.263653516769409, "learning_rate": 1.399697208599062e-05, "loss": 0.8334, "step": 556 }, { "epoch": 0.03921154523055262, "grad_norm": 3.2612195014953613, "learning_rate": 1.3996924963082987e-05, "loss": 0.8353, "step": 557 }, { "epoch": 0.03928194297782471, "grad_norm": 2.8736414909362793, "learning_rate": 1.3996877476398889e-05, "loss": 0.8151, "step": 558 }, { "epoch": 0.039352340725096795, "grad_norm": 3.4466845989227295, "learning_rate": 1.3996829625940792e-05, "loss": 0.7471, "step": 559 }, { "epoch": 0.039422738472368886, "grad_norm": 2.3491859436035156, "learning_rate": 1.3996781411711188e-05, "loss": 0.8278, "step": 560 }, { "epoch": 0.03949313621964097, "grad_norm": 2.6061103343963623, "learning_rate": 1.3996732833712582e-05, "loss": 1.0888, "step": 561 }, { "epoch": 0.03956353396691306, "grad_norm": 3.4924848079681396, "learning_rate": 1.39966838919475e-05, "loss": 0.8635, "step": 562 }, { "epoch": 0.039633931714185146, "grad_norm": 3.603238582611084, "learning_rate": 1.3996634586418486e-05, "loss": 0.7736, "step": 563 }, { "epoch": 0.03970432946145723, "grad_norm": 3.2938106060028076, "learning_rate": 1.3996584917128102e-05, "loss": 0.7416, "step": 564 }, { "epoch": 0.03977472720872932, "grad_norm": 2.842083215713501, "learning_rate": 1.3996534884078931e-05, "loss": 0.6779, "step": 565 }, { "epoch": 0.039845124956001406, "grad_norm": 3.6062729358673096, "learning_rate": 1.3996484487273575e-05, "loss": 0.8641, "step": 566 }, { "epoch": 0.0399155227032735, "grad_norm": 2.9615612030029297, "learning_rate": 1.3996433726714657e-05, "loss": 0.8178, "step": 567 }, { "epoch": 0.03998592045054558, "grad_norm": 3.1762547492980957, "learning_rate": 1.399638260240481e-05, "loss": 0.8065, "step": 568 }, { "epoch": 0.04005631819781767, "grad_norm": 2.8781039714813232, "learning_rate": 1.3996331114346697e-05, "loss": 0.8916, "step": 569 }, { "epoch": 0.04012671594508976, "grad_norm": 3.3186848163604736, "learning_rate": 1.3996279262542994e-05, "loss": 0.7611, "step": 570 }, { "epoch": 0.04019711369236184, "grad_norm": 3.444287061691284, "learning_rate": 1.3996227046996394e-05, "loss": 0.6326, "step": 571 }, { "epoch": 0.04026751143963393, "grad_norm": 3.2134804725646973, "learning_rate": 1.3996174467709613e-05, "loss": 0.8539, "step": 572 }, { "epoch": 0.04033790918690602, "grad_norm": 2.9034829139709473, "learning_rate": 1.3996121524685388e-05, "loss": 0.7416, "step": 573 }, { "epoch": 0.04040830693417811, "grad_norm": 2.5816683769226074, "learning_rate": 1.3996068217926468e-05, "loss": 0.785, "step": 574 }, { "epoch": 0.04047870468145019, "grad_norm": 2.8911941051483154, "learning_rate": 1.3996014547435623e-05, "loss": 0.8308, "step": 575 }, { "epoch": 0.04054910242872228, "grad_norm": 3.1707026958465576, "learning_rate": 1.3995960513215648e-05, "loss": 0.8582, "step": 576 }, { "epoch": 0.04061950017599437, "grad_norm": 2.6814775466918945, "learning_rate": 1.399590611526935e-05, "loss": 0.775, "step": 577 }, { "epoch": 0.04068989792326645, "grad_norm": 3.1916022300720215, "learning_rate": 1.3995851353599557e-05, "loss": 0.8466, "step": 578 }, { "epoch": 0.04076029567053854, "grad_norm": 3.254413604736328, "learning_rate": 1.3995796228209116e-05, "loss": 0.8542, "step": 579 }, { "epoch": 0.04083069341781063, "grad_norm": 2.639108180999756, "learning_rate": 1.3995740739100894e-05, "loss": 0.8417, "step": 580 }, { "epoch": 0.04090109116508272, "grad_norm": 3.2948527336120605, "learning_rate": 1.3995684886277773e-05, "loss": 0.9285, "step": 581 }, { "epoch": 0.0409714889123548, "grad_norm": 4.132096767425537, "learning_rate": 1.399562866974266e-05, "loss": 0.8599, "step": 582 }, { "epoch": 0.041041886659626894, "grad_norm": 3.2728514671325684, "learning_rate": 1.3995572089498479e-05, "loss": 0.7564, "step": 583 }, { "epoch": 0.04111228440689898, "grad_norm": 2.8726067543029785, "learning_rate": 1.3995515145548168e-05, "loss": 0.8545, "step": 584 }, { "epoch": 0.04118268215417107, "grad_norm": 3.138972043991089, "learning_rate": 1.3995457837894689e-05, "loss": 0.7385, "step": 585 }, { "epoch": 0.041253079901443154, "grad_norm": 2.5554850101470947, "learning_rate": 1.3995400166541021e-05, "loss": 0.6948, "step": 586 }, { "epoch": 0.04132347764871524, "grad_norm": 2.983220100402832, "learning_rate": 1.3995342131490164e-05, "loss": 0.8357, "step": 587 }, { "epoch": 0.04139387539598733, "grad_norm": 3.447674512863159, "learning_rate": 1.3995283732745133e-05, "loss": 0.8964, "step": 588 }, { "epoch": 0.041464273143259414, "grad_norm": 2.6571645736694336, "learning_rate": 1.3995224970308967e-05, "loss": 0.7378, "step": 589 }, { "epoch": 0.041534670890531505, "grad_norm": 2.5773870944976807, "learning_rate": 1.3995165844184718e-05, "loss": 0.974, "step": 590 }, { "epoch": 0.04160506863780359, "grad_norm": 2.931352138519287, "learning_rate": 1.3995106354375463e-05, "loss": 0.8262, "step": 591 }, { "epoch": 0.04167546638507568, "grad_norm": 3.0011138916015625, "learning_rate": 1.3995046500884293e-05, "loss": 0.8139, "step": 592 }, { "epoch": 0.041745864132347765, "grad_norm": 4.662199974060059, "learning_rate": 1.399498628371432e-05, "loss": 0.8194, "step": 593 }, { "epoch": 0.04181626187961985, "grad_norm": 3.69881272315979, "learning_rate": 1.3994925702868674e-05, "loss": 0.8952, "step": 594 }, { "epoch": 0.04188665962689194, "grad_norm": 3.118802070617676, "learning_rate": 1.3994864758350507e-05, "loss": 0.8823, "step": 595 }, { "epoch": 0.041957057374164025, "grad_norm": 3.833556652069092, "learning_rate": 1.3994803450162988e-05, "loss": 0.9066, "step": 596 }, { "epoch": 0.042027455121436116, "grad_norm": 2.8065719604492188, "learning_rate": 1.39947417783093e-05, "loss": 0.8227, "step": 597 }, { "epoch": 0.0420978528687082, "grad_norm": 2.7200613021850586, "learning_rate": 1.3994679742792654e-05, "loss": 0.8565, "step": 598 }, { "epoch": 0.04216825061598029, "grad_norm": 2.7900142669677734, "learning_rate": 1.3994617343616272e-05, "loss": 0.7952, "step": 599 }, { "epoch": 0.042238648363252376, "grad_norm": 3.8182265758514404, "learning_rate": 1.3994554580783401e-05, "loss": 0.788, "step": 600 }, { "epoch": 0.04230904611052446, "grad_norm": 3.8539509773254395, "learning_rate": 1.3994491454297302e-05, "loss": 0.9168, "step": 601 }, { "epoch": 0.04237944385779655, "grad_norm": 2.8815250396728516, "learning_rate": 1.3994427964161258e-05, "loss": 0.6446, "step": 602 }, { "epoch": 0.042449841605068636, "grad_norm": 3.436572313308716, "learning_rate": 1.3994364110378567e-05, "loss": 0.92, "step": 603 }, { "epoch": 0.04252023935234073, "grad_norm": 2.9643971920013428, "learning_rate": 1.3994299892952555e-05, "loss": 0.8225, "step": 604 }, { "epoch": 0.04259063709961281, "grad_norm": 6.986907958984375, "learning_rate": 1.3994235311886555e-05, "loss": 0.8363, "step": 605 }, { "epoch": 0.0426610348468849, "grad_norm": 2.9424030780792236, "learning_rate": 1.3994170367183928e-05, "loss": 0.9139, "step": 606 }, { "epoch": 0.04273143259415699, "grad_norm": 4.638256549835205, "learning_rate": 1.3994105058848048e-05, "loss": 0.9338, "step": 607 }, { "epoch": 0.04280183034142907, "grad_norm": 2.68192195892334, "learning_rate": 1.3994039386882312e-05, "loss": 0.9014, "step": 608 }, { "epoch": 0.04287222808870116, "grad_norm": 3.1741292476654053, "learning_rate": 1.3993973351290136e-05, "loss": 0.8264, "step": 609 }, { "epoch": 0.04294262583597325, "grad_norm": 3.42448353767395, "learning_rate": 1.399390695207495e-05, "loss": 0.9273, "step": 610 }, { "epoch": 0.04301302358324534, "grad_norm": 5.154784202575684, "learning_rate": 1.3993840189240207e-05, "loss": 0.7445, "step": 611 }, { "epoch": 0.04308342133051742, "grad_norm": 3.2171409130096436, "learning_rate": 1.399377306278938e-05, "loss": 0.8539, "step": 612 }, { "epoch": 0.043153819077789514, "grad_norm": 3.340319871902466, "learning_rate": 1.3993705572725957e-05, "loss": 0.7391, "step": 613 }, { "epoch": 0.0432242168250616, "grad_norm": 2.962698221206665, "learning_rate": 1.3993637719053447e-05, "loss": 0.8524, "step": 614 }, { "epoch": 0.04329461457233368, "grad_norm": 2.737208604812622, "learning_rate": 1.3993569501775377e-05, "loss": 0.8582, "step": 615 }, { "epoch": 0.043365012319605774, "grad_norm": 3.2467129230499268, "learning_rate": 1.3993500920895297e-05, "loss": 0.863, "step": 616 }, { "epoch": 0.04343541006687786, "grad_norm": 2.6415817737579346, "learning_rate": 1.399343197641677e-05, "loss": 0.8093, "step": 617 }, { "epoch": 0.04350580781414995, "grad_norm": 2.807705879211426, "learning_rate": 1.399336266834338e-05, "loss": 0.7733, "step": 618 }, { "epoch": 0.043576205561422034, "grad_norm": 3.71528697013855, "learning_rate": 1.3993292996678733e-05, "loss": 0.8476, "step": 619 }, { "epoch": 0.043646603308694125, "grad_norm": 4.940652370452881, "learning_rate": 1.3993222961426446e-05, "loss": 0.9715, "step": 620 }, { "epoch": 0.04371700105596621, "grad_norm": 3.358454465866089, "learning_rate": 1.3993152562590168e-05, "loss": 0.8072, "step": 621 }, { "epoch": 0.043787398803238294, "grad_norm": 2.5002331733703613, "learning_rate": 1.3993081800173553e-05, "loss": 0.8363, "step": 622 }, { "epoch": 0.043857796550510385, "grad_norm": 3.0807623863220215, "learning_rate": 1.3993010674180281e-05, "loss": 0.8641, "step": 623 }, { "epoch": 0.04392819429778247, "grad_norm": 3.0479373931884766, "learning_rate": 1.3992939184614052e-05, "loss": 0.7353, "step": 624 }, { "epoch": 0.04399859204505456, "grad_norm": 2.978351593017578, "learning_rate": 1.3992867331478581e-05, "loss": 0.8863, "step": 625 }, { "epoch": 0.044068989792326645, "grad_norm": 2.8319127559661865, "learning_rate": 1.3992795114777605e-05, "loss": 0.8966, "step": 626 }, { "epoch": 0.044139387539598736, "grad_norm": 4.638989448547363, "learning_rate": 1.3992722534514879e-05, "loss": 0.9038, "step": 627 }, { "epoch": 0.04420978528687082, "grad_norm": 3.836480140686035, "learning_rate": 1.3992649590694173e-05, "loss": 0.8008, "step": 628 }, { "epoch": 0.044280183034142905, "grad_norm": 3.361671209335327, "learning_rate": 1.3992576283319283e-05, "loss": 0.9475, "step": 629 }, { "epoch": 0.044350580781414996, "grad_norm": 2.2469801902770996, "learning_rate": 1.399250261239402e-05, "loss": 0.8169, "step": 630 }, { "epoch": 0.04442097852868708, "grad_norm": 2.9453229904174805, "learning_rate": 1.3992428577922211e-05, "loss": 0.7529, "step": 631 }, { "epoch": 0.04449137627595917, "grad_norm": 2.7885899543762207, "learning_rate": 1.399235417990771e-05, "loss": 0.839, "step": 632 }, { "epoch": 0.044561774023231256, "grad_norm": 2.331615924835205, "learning_rate": 1.3992279418354381e-05, "loss": 0.7937, "step": 633 }, { "epoch": 0.04463217177050335, "grad_norm": 3.755197286605835, "learning_rate": 1.3992204293266114e-05, "loss": 0.885, "step": 634 }, { "epoch": 0.04470256951777543, "grad_norm": 2.7593140602111816, "learning_rate": 1.3992128804646811e-05, "loss": 0.7841, "step": 635 }, { "epoch": 0.044772967265047516, "grad_norm": 2.3185527324676514, "learning_rate": 1.3992052952500402e-05, "loss": 0.8011, "step": 636 }, { "epoch": 0.04484336501231961, "grad_norm": 3.883204221725464, "learning_rate": 1.3991976736830825e-05, "loss": 0.9378, "step": 637 }, { "epoch": 0.04491376275959169, "grad_norm": 2.983015537261963, "learning_rate": 1.3991900157642048e-05, "loss": 0.8947, "step": 638 }, { "epoch": 0.04498416050686378, "grad_norm": 3.223975658416748, "learning_rate": 1.3991823214938046e-05, "loss": 0.7582, "step": 639 }, { "epoch": 0.04505455825413587, "grad_norm": 3.5113606452941895, "learning_rate": 1.3991745908722824e-05, "loss": 0.7585, "step": 640 }, { "epoch": 0.04512495600140796, "grad_norm": 3.5058138370513916, "learning_rate": 1.3991668239000401e-05, "loss": 0.9325, "step": 641 }, { "epoch": 0.04519535374868004, "grad_norm": 2.8188700675964355, "learning_rate": 1.3991590205774816e-05, "loss": 0.8776, "step": 642 }, { "epoch": 0.045265751495952126, "grad_norm": 2.6184709072113037, "learning_rate": 1.399151180905012e-05, "loss": 0.8521, "step": 643 }, { "epoch": 0.04533614924322422, "grad_norm": 8.33962345123291, "learning_rate": 1.3991433048830394e-05, "loss": 0.7851, "step": 644 }, { "epoch": 0.0454065469904963, "grad_norm": 2.7237563133239746, "learning_rate": 1.3991353925119735e-05, "loss": 0.7464, "step": 645 }, { "epoch": 0.04547694473776839, "grad_norm": 2.90704083442688, "learning_rate": 1.3991274437922251e-05, "loss": 0.808, "step": 646 }, { "epoch": 0.04554734248504048, "grad_norm": 2.781816244125366, "learning_rate": 1.3991194587242079e-05, "loss": 0.8226, "step": 647 }, { "epoch": 0.04561774023231257, "grad_norm": 3.1352083683013916, "learning_rate": 1.3991114373083367e-05, "loss": 0.8892, "step": 648 }, { "epoch": 0.04568813797958465, "grad_norm": 2.9643239974975586, "learning_rate": 1.399103379545029e-05, "loss": 0.8932, "step": 649 }, { "epoch": 0.04575853572685674, "grad_norm": 3.1343560218811035, "learning_rate": 1.3990952854347032e-05, "loss": 0.6376, "step": 650 }, { "epoch": 0.04582893347412883, "grad_norm": 2.53393292427063, "learning_rate": 1.3990871549777804e-05, "loss": 0.8668, "step": 651 }, { "epoch": 0.04589933122140091, "grad_norm": 2.611707925796509, "learning_rate": 1.3990789881746834e-05, "loss": 0.7879, "step": 652 }, { "epoch": 0.045969728968673004, "grad_norm": 2.382315158843994, "learning_rate": 1.3990707850258365e-05, "loss": 0.9378, "step": 653 }, { "epoch": 0.04604012671594509, "grad_norm": 2.7706809043884277, "learning_rate": 1.3990625455316667e-05, "loss": 0.8717, "step": 654 }, { "epoch": 0.04611052446321718, "grad_norm": 3.0526013374328613, "learning_rate": 1.3990542696926018e-05, "loss": 0.8795, "step": 655 }, { "epoch": 0.046180922210489264, "grad_norm": 3.222999095916748, "learning_rate": 1.3990459575090723e-05, "loss": 0.8207, "step": 656 }, { "epoch": 0.04625131995776135, "grad_norm": 3.0456573963165283, "learning_rate": 1.3990376089815105e-05, "loss": 0.8873, "step": 657 }, { "epoch": 0.04632171770503344, "grad_norm": 2.870551824569702, "learning_rate": 1.3990292241103502e-05, "loss": 0.931, "step": 658 }, { "epoch": 0.046392115452305524, "grad_norm": 3.0117945671081543, "learning_rate": 1.3990208028960276e-05, "loss": 0.8932, "step": 659 }, { "epoch": 0.046462513199577615, "grad_norm": 3.116360664367676, "learning_rate": 1.39901234533898e-05, "loss": 0.9275, "step": 660 }, { "epoch": 0.0465329109468497, "grad_norm": 3.6685383319854736, "learning_rate": 1.399003851439648e-05, "loss": 0.8892, "step": 661 }, { "epoch": 0.04660330869412179, "grad_norm": 2.819368839263916, "learning_rate": 1.3989953211984725e-05, "loss": 0.9099, "step": 662 }, { "epoch": 0.046673706441393875, "grad_norm": 3.810616970062256, "learning_rate": 1.3989867546158971e-05, "loss": 0.7366, "step": 663 }, { "epoch": 0.04674410418866596, "grad_norm": 2.8623907566070557, "learning_rate": 1.3989781516923675e-05, "loss": 0.8477, "step": 664 }, { "epoch": 0.04681450193593805, "grad_norm": 2.969770669937134, "learning_rate": 1.3989695124283307e-05, "loss": 0.7092, "step": 665 }, { "epoch": 0.046884899683210135, "grad_norm": 2.634084939956665, "learning_rate": 1.3989608368242359e-05, "loss": 0.8062, "step": 666 }, { "epoch": 0.046955297430482226, "grad_norm": 2.4992165565490723, "learning_rate": 1.3989521248805342e-05, "loss": 0.9245, "step": 667 }, { "epoch": 0.04702569517775431, "grad_norm": 2.5653514862060547, "learning_rate": 1.3989433765976784e-05, "loss": 0.8038, "step": 668 }, { "epoch": 0.0470960929250264, "grad_norm": 2.589076042175293, "learning_rate": 1.3989345919761236e-05, "loss": 0.8655, "step": 669 }, { "epoch": 0.047166490672298486, "grad_norm": 2.902453899383545, "learning_rate": 1.3989257710163262e-05, "loss": 0.8627, "step": 670 }, { "epoch": 0.04723688841957057, "grad_norm": 2.7680416107177734, "learning_rate": 1.398916913718745e-05, "loss": 0.8249, "step": 671 }, { "epoch": 0.04730728616684266, "grad_norm": 3.3486061096191406, "learning_rate": 1.3989080200838407e-05, "loss": 0.8974, "step": 672 }, { "epoch": 0.047377683914114746, "grad_norm": 2.5028817653656006, "learning_rate": 1.3988990901120753e-05, "loss": 0.8272, "step": 673 }, { "epoch": 0.04744808166138684, "grad_norm": 5.2081217765808105, "learning_rate": 1.3988901238039133e-05, "loss": 0.9207, "step": 674 }, { "epoch": 0.04751847940865892, "grad_norm": 3.142010450363159, "learning_rate": 1.3988811211598208e-05, "loss": 0.8575, "step": 675 }, { "epoch": 0.04758887715593101, "grad_norm": 3.288538694381714, "learning_rate": 1.3988720821802658e-05, "loss": 0.8307, "step": 676 }, { "epoch": 0.0476592749032031, "grad_norm": 2.5837507247924805, "learning_rate": 1.3988630068657184e-05, "loss": 0.6858, "step": 677 }, { "epoch": 0.04772967265047518, "grad_norm": 2.890468120574951, "learning_rate": 1.3988538952166504e-05, "loss": 0.9703, "step": 678 }, { "epoch": 0.04780007039774727, "grad_norm": 2.7492434978485107, "learning_rate": 1.3988447472335353e-05, "loss": 0.9226, "step": 679 }, { "epoch": 0.04787046814501936, "grad_norm": 2.8699605464935303, "learning_rate": 1.3988355629168489e-05, "loss": 0.8332, "step": 680 }, { "epoch": 0.04794086589229145, "grad_norm": 2.451011896133423, "learning_rate": 1.3988263422670688e-05, "loss": 0.8303, "step": 681 }, { "epoch": 0.04801126363956353, "grad_norm": 2.8877081871032715, "learning_rate": 1.3988170852846743e-05, "loss": 0.9855, "step": 682 }, { "epoch": 0.048081661386835624, "grad_norm": 2.841970682144165, "learning_rate": 1.3988077919701465e-05, "loss": 0.8673, "step": 683 }, { "epoch": 0.04815205913410771, "grad_norm": 3.1847903728485107, "learning_rate": 1.3987984623239687e-05, "loss": 0.792, "step": 684 }, { "epoch": 0.04822245688137979, "grad_norm": 2.6723263263702393, "learning_rate": 1.3987890963466262e-05, "loss": 0.8908, "step": 685 }, { "epoch": 0.048292854628651884, "grad_norm": 3.3318393230438232, "learning_rate": 1.3987796940386055e-05, "loss": 0.7154, "step": 686 }, { "epoch": 0.04836325237592397, "grad_norm": 3.527179718017578, "learning_rate": 1.3987702554003958e-05, "loss": 0.9761, "step": 687 }, { "epoch": 0.04843365012319606, "grad_norm": 2.9489657878875732, "learning_rate": 1.3987607804324876e-05, "loss": 0.8084, "step": 688 }, { "epoch": 0.048504047870468144, "grad_norm": 3.057199478149414, "learning_rate": 1.3987512691353734e-05, "loss": 0.8009, "step": 689 }, { "epoch": 0.048574445617740235, "grad_norm": 2.946714401245117, "learning_rate": 1.3987417215095481e-05, "loss": 0.8358, "step": 690 }, { "epoch": 0.04864484336501232, "grad_norm": 2.3823678493499756, "learning_rate": 1.3987321375555081e-05, "loss": 0.9048, "step": 691 }, { "epoch": 0.04871524111228441, "grad_norm": 2.932769298553467, "learning_rate": 1.3987225172737512e-05, "loss": 0.7162, "step": 692 }, { "epoch": 0.048785638859556495, "grad_norm": 2.647247076034546, "learning_rate": 1.3987128606647777e-05, "loss": 0.8488, "step": 693 }, { "epoch": 0.04885603660682858, "grad_norm": 2.645198106765747, "learning_rate": 1.39870316772909e-05, "loss": 0.821, "step": 694 }, { "epoch": 0.04892643435410067, "grad_norm": 2.560230255126953, "learning_rate": 1.3986934384671917e-05, "loss": 0.8415, "step": 695 }, { "epoch": 0.048996832101372755, "grad_norm": 3.8510472774505615, "learning_rate": 1.3986836728795889e-05, "loss": 0.7878, "step": 696 }, { "epoch": 0.049067229848644846, "grad_norm": 3.201700448989868, "learning_rate": 1.3986738709667891e-05, "loss": 0.7937, "step": 697 }, { "epoch": 0.04913762759591693, "grad_norm": 2.4279398918151855, "learning_rate": 1.398664032729302e-05, "loss": 0.7355, "step": 698 }, { "epoch": 0.04920802534318902, "grad_norm": 2.3685619831085205, "learning_rate": 1.3986541581676391e-05, "loss": 0.898, "step": 699 }, { "epoch": 0.049278423090461106, "grad_norm": 2.8274645805358887, "learning_rate": 1.3986442472823138e-05, "loss": 0.8004, "step": 700 }, { "epoch": 0.04934882083773319, "grad_norm": 3.1318671703338623, "learning_rate": 1.3986343000738412e-05, "loss": 0.859, "step": 701 }, { "epoch": 0.04941921858500528, "grad_norm": 2.432746648788452, "learning_rate": 1.3986243165427389e-05, "loss": 0.814, "step": 702 }, { "epoch": 0.049489616332277366, "grad_norm": 3.2953317165374756, "learning_rate": 1.3986142966895255e-05, "loss": 0.9016, "step": 703 }, { "epoch": 0.04956001407954946, "grad_norm": 2.799640417098999, "learning_rate": 1.398604240514722e-05, "loss": 0.8283, "step": 704 }, { "epoch": 0.04963041182682154, "grad_norm": 2.985623359680176, "learning_rate": 1.3985941480188516e-05, "loss": 0.7764, "step": 705 }, { "epoch": 0.04970080957409363, "grad_norm": 2.5629806518554688, "learning_rate": 1.3985840192024385e-05, "loss": 0.793, "step": 706 }, { "epoch": 0.04977120732136572, "grad_norm": 2.641219139099121, "learning_rate": 1.3985738540660098e-05, "loss": 0.9018, "step": 707 }, { "epoch": 0.0498416050686378, "grad_norm": 2.911142587661743, "learning_rate": 1.3985636526100934e-05, "loss": 0.8143, "step": 708 }, { "epoch": 0.04991200281590989, "grad_norm": 2.908027410507202, "learning_rate": 1.3985534148352206e-05, "loss": 0.7829, "step": 709 }, { "epoch": 0.049982400563181976, "grad_norm": 3.7878053188323975, "learning_rate": 1.3985431407419227e-05, "loss": 0.934, "step": 710 }, { "epoch": 0.05005279831045407, "grad_norm": 2.39341402053833, "learning_rate": 1.3985328303307345e-05, "loss": 0.8724, "step": 711 }, { "epoch": 0.05012319605772615, "grad_norm": 3.2111599445343018, "learning_rate": 1.3985224836021916e-05, "loss": 0.83, "step": 712 }, { "epoch": 0.05019359380499824, "grad_norm": 2.432382106781006, "learning_rate": 1.3985121005568324e-05, "loss": 0.8139, "step": 713 }, { "epoch": 0.05026399155227033, "grad_norm": 3.898714065551758, "learning_rate": 1.3985016811951963e-05, "loss": 0.7808, "step": 714 }, { "epoch": 0.05033438929954241, "grad_norm": 2.8255977630615234, "learning_rate": 1.3984912255178252e-05, "loss": 0.7893, "step": 715 }, { "epoch": 0.0504047870468145, "grad_norm": 2.616126537322998, "learning_rate": 1.3984807335252628e-05, "loss": 0.728, "step": 716 }, { "epoch": 0.05047518479408659, "grad_norm": 2.8033738136291504, "learning_rate": 1.3984702052180544e-05, "loss": 0.8237, "step": 717 }, { "epoch": 0.05054558254135868, "grad_norm": 2.8952503204345703, "learning_rate": 1.3984596405967475e-05, "loss": 0.7724, "step": 718 }, { "epoch": 0.05061598028863076, "grad_norm": 2.75644588470459, "learning_rate": 1.3984490396618913e-05, "loss": 0.7862, "step": 719 }, { "epoch": 0.050686378035902854, "grad_norm": 2.79675555229187, "learning_rate": 1.3984384024140369e-05, "loss": 0.7688, "step": 720 }, { "epoch": 0.05075677578317494, "grad_norm": 2.7456278800964355, "learning_rate": 1.3984277288537374e-05, "loss": 0.6981, "step": 721 }, { "epoch": 0.05082717353044702, "grad_norm": 2.5601515769958496, "learning_rate": 1.3984170189815481e-05, "loss": 0.8164, "step": 722 }, { "epoch": 0.050897571277719114, "grad_norm": 3.6979687213897705, "learning_rate": 1.398406272798025e-05, "loss": 0.6856, "step": 723 }, { "epoch": 0.0509679690249912, "grad_norm": 2.6759891510009766, "learning_rate": 1.3983954903037274e-05, "loss": 0.871, "step": 724 }, { "epoch": 0.05103836677226329, "grad_norm": 2.295236110687256, "learning_rate": 1.3983846714992158e-05, "loss": 0.6784, "step": 725 }, { "epoch": 0.051108764519535374, "grad_norm": 4.108501434326172, "learning_rate": 1.3983738163850526e-05, "loss": 0.8216, "step": 726 }, { "epoch": 0.051179162266807465, "grad_norm": 3.1564605236053467, "learning_rate": 1.3983629249618025e-05, "loss": 0.7545, "step": 727 }, { "epoch": 0.05124956001407955, "grad_norm": 3.203169584274292, "learning_rate": 1.398351997230031e-05, "loss": 0.8203, "step": 728 }, { "epoch": 0.051319957761351634, "grad_norm": 3.2010984420776367, "learning_rate": 1.398341033190307e-05, "loss": 0.8009, "step": 729 }, { "epoch": 0.051390355508623725, "grad_norm": 3.5716562271118164, "learning_rate": 1.3983300328432003e-05, "loss": 0.7984, "step": 730 }, { "epoch": 0.05146075325589581, "grad_norm": 2.6651852130889893, "learning_rate": 1.3983189961892826e-05, "loss": 0.8903, "step": 731 }, { "epoch": 0.0515311510031679, "grad_norm": 3.229210376739502, "learning_rate": 1.3983079232291279e-05, "loss": 0.8723, "step": 732 }, { "epoch": 0.051601548750439985, "grad_norm": 3.8051159381866455, "learning_rate": 1.3982968139633116e-05, "loss": 0.754, "step": 733 }, { "epoch": 0.051671946497712076, "grad_norm": 2.803358793258667, "learning_rate": 1.3982856683924116e-05, "loss": 0.8331, "step": 734 }, { "epoch": 0.05174234424498416, "grad_norm": 2.683288097381592, "learning_rate": 1.3982744865170075e-05, "loss": 0.8174, "step": 735 }, { "epoch": 0.051812741992256245, "grad_norm": 2.5379276275634766, "learning_rate": 1.3982632683376805e-05, "loss": 0.9519, "step": 736 }, { "epoch": 0.051883139739528336, "grad_norm": 2.5294148921966553, "learning_rate": 1.3982520138550136e-05, "loss": 0.811, "step": 737 }, { "epoch": 0.05195353748680042, "grad_norm": 3.005319595336914, "learning_rate": 1.3982407230695923e-05, "loss": 0.7679, "step": 738 }, { "epoch": 0.05202393523407251, "grad_norm": 3.278287649154663, "learning_rate": 1.398229395982003e-05, "loss": 0.7206, "step": 739 }, { "epoch": 0.052094332981344596, "grad_norm": 2.724275588989258, "learning_rate": 1.3982180325928356e-05, "loss": 0.7438, "step": 740 }, { "epoch": 0.05216473072861669, "grad_norm": 2.333674907684326, "learning_rate": 1.39820663290268e-05, "loss": 0.7723, "step": 741 }, { "epoch": 0.05223512847588877, "grad_norm": 2.563398599624634, "learning_rate": 1.3981951969121293e-05, "loss": 0.8379, "step": 742 }, { "epoch": 0.052305526223160856, "grad_norm": 2.8131086826324463, "learning_rate": 1.398183724621778e-05, "loss": 0.762, "step": 743 }, { "epoch": 0.05237592397043295, "grad_norm": 3.1646552085876465, "learning_rate": 1.3981722160322228e-05, "loss": 0.7743, "step": 744 }, { "epoch": 0.05244632171770503, "grad_norm": 2.68544864654541, "learning_rate": 1.3981606711440614e-05, "loss": 0.7779, "step": 745 }, { "epoch": 0.05251671946497712, "grad_norm": 8.268613815307617, "learning_rate": 1.3981490899578947e-05, "loss": 0.7663, "step": 746 }, { "epoch": 0.05258711721224921, "grad_norm": 2.4277842044830322, "learning_rate": 1.3981374724743243e-05, "loss": 0.8292, "step": 747 }, { "epoch": 0.0526575149595213, "grad_norm": 2.7284724712371826, "learning_rate": 1.3981258186939546e-05, "loss": 0.8414, "step": 748 }, { "epoch": 0.05272791270679338, "grad_norm": 3.6003355979919434, "learning_rate": 1.3981141286173913e-05, "loss": 0.786, "step": 749 }, { "epoch": 0.05279831045406547, "grad_norm": 1.644735336303711, "learning_rate": 1.3981024022452419e-05, "loss": 0.9029, "step": 750 }, { "epoch": 0.05286870820133756, "grad_norm": 3.3569185733795166, "learning_rate": 1.3980906395781168e-05, "loss": 0.9093, "step": 751 }, { "epoch": 0.05293910594860964, "grad_norm": 3.2259087562561035, "learning_rate": 1.398078840616627e-05, "loss": 0.7531, "step": 752 }, { "epoch": 0.053009503695881734, "grad_norm": 2.7807788848876953, "learning_rate": 1.398067005361386e-05, "loss": 0.9643, "step": 753 }, { "epoch": 0.05307990144315382, "grad_norm": 3.459033250808716, "learning_rate": 1.3980551338130091e-05, "loss": 0.7065, "step": 754 }, { "epoch": 0.05315029919042591, "grad_norm": 3.129061698913574, "learning_rate": 1.3980432259721137e-05, "loss": 0.8084, "step": 755 }, { "epoch": 0.053220696937697994, "grad_norm": 2.747666358947754, "learning_rate": 1.3980312818393188e-05, "loss": 0.8485, "step": 756 }, { "epoch": 0.05329109468497008, "grad_norm": 2.5373542308807373, "learning_rate": 1.3980193014152455e-05, "loss": 0.9374, "step": 757 }, { "epoch": 0.05336149243224217, "grad_norm": 2.4282190799713135, "learning_rate": 1.3980072847005163e-05, "loss": 0.7267, "step": 758 }, { "epoch": 0.05343189017951425, "grad_norm": 2.6626877784729004, "learning_rate": 1.3979952316957564e-05, "loss": 0.8298, "step": 759 }, { "epoch": 0.053502287926786345, "grad_norm": 2.56164288520813, "learning_rate": 1.3979831424015923e-05, "loss": 1.0345, "step": 760 }, { "epoch": 0.05357268567405843, "grad_norm": 2.4681146144866943, "learning_rate": 1.3979710168186523e-05, "loss": 0.8936, "step": 761 }, { "epoch": 0.05364308342133052, "grad_norm": 3.349982738494873, "learning_rate": 1.3979588549475671e-05, "loss": 0.959, "step": 762 }, { "epoch": 0.053713481168602605, "grad_norm": 3.5909135341644287, "learning_rate": 1.397946656788969e-05, "loss": 0.9173, "step": 763 }, { "epoch": 0.05378387891587469, "grad_norm": 2.757384777069092, "learning_rate": 1.397934422343492e-05, "loss": 0.7083, "step": 764 }, { "epoch": 0.05385427666314678, "grad_norm": 2.7169158458709717, "learning_rate": 1.3979221516117723e-05, "loss": 0.7742, "step": 765 }, { "epoch": 0.053924674410418864, "grad_norm": 2.4077370166778564, "learning_rate": 1.397909844594448e-05, "loss": 0.7265, "step": 766 }, { "epoch": 0.053995072157690956, "grad_norm": 2.8874993324279785, "learning_rate": 1.3978975012921587e-05, "loss": 0.8733, "step": 767 }, { "epoch": 0.05406546990496304, "grad_norm": 3.2958602905273438, "learning_rate": 1.3978851217055463e-05, "loss": 0.8807, "step": 768 }, { "epoch": 0.05413586765223513, "grad_norm": 3.138025999069214, "learning_rate": 1.3978727058352545e-05, "loss": 0.7914, "step": 769 }, { "epoch": 0.054206265399507216, "grad_norm": 3.454031229019165, "learning_rate": 1.3978602536819286e-05, "loss": 0.9262, "step": 770 }, { "epoch": 0.0542766631467793, "grad_norm": 3.2560415267944336, "learning_rate": 1.3978477652462158e-05, "loss": 0.8609, "step": 771 }, { "epoch": 0.05434706089405139, "grad_norm": 2.585158348083496, "learning_rate": 1.3978352405287659e-05, "loss": 0.8444, "step": 772 }, { "epoch": 0.054417458641323475, "grad_norm": 2.9122416973114014, "learning_rate": 1.39782267953023e-05, "loss": 0.7026, "step": 773 }, { "epoch": 0.05448785638859557, "grad_norm": 2.9291064739227295, "learning_rate": 1.3978100822512608e-05, "loss": 0.7923, "step": 774 }, { "epoch": 0.05455825413586765, "grad_norm": 3.025038242340088, "learning_rate": 1.3977974486925136e-05, "loss": 0.8966, "step": 775 }, { "epoch": 0.05462865188313974, "grad_norm": 3.147413730621338, "learning_rate": 1.3977847788546451e-05, "loss": 0.8772, "step": 776 }, { "epoch": 0.054699049630411826, "grad_norm": 3.3828623294830322, "learning_rate": 1.397772072738314e-05, "loss": 0.8226, "step": 777 }, { "epoch": 0.05476944737768391, "grad_norm": 2.7313575744628906, "learning_rate": 1.3977593303441807e-05, "loss": 0.8902, "step": 778 }, { "epoch": 0.054839845124956, "grad_norm": 3.092282295227051, "learning_rate": 1.3977465516729079e-05, "loss": 0.753, "step": 779 }, { "epoch": 0.054910242872228086, "grad_norm": 2.471165657043457, "learning_rate": 1.39773373672516e-05, "loss": 0.7202, "step": 780 }, { "epoch": 0.05498064061950018, "grad_norm": 3.506514310836792, "learning_rate": 1.3977208855016035e-05, "loss": 0.9916, "step": 781 }, { "epoch": 0.05505103836677226, "grad_norm": 2.498566150665283, "learning_rate": 1.397707998002906e-05, "loss": 0.718, "step": 782 }, { "epoch": 0.05512143611404435, "grad_norm": 2.9316225051879883, "learning_rate": 1.397695074229738e-05, "loss": 0.6682, "step": 783 }, { "epoch": 0.05519183386131644, "grad_norm": 2.7276298999786377, "learning_rate": 1.3976821141827709e-05, "loss": 0.9103, "step": 784 }, { "epoch": 0.05526223160858852, "grad_norm": 3.407465934753418, "learning_rate": 1.397669117862679e-05, "loss": 0.6751, "step": 785 }, { "epoch": 0.05533262935586061, "grad_norm": 2.7540299892425537, "learning_rate": 1.3976560852701376e-05, "loss": 0.8264, "step": 786 }, { "epoch": 0.0554030271031327, "grad_norm": 2.8935627937316895, "learning_rate": 1.397643016405825e-05, "loss": 0.7803, "step": 787 }, { "epoch": 0.05547342485040479, "grad_norm": 2.9142093658447266, "learning_rate": 1.3976299112704197e-05, "loss": 0.809, "step": 788 }, { "epoch": 0.05554382259767687, "grad_norm": 2.8878235816955566, "learning_rate": 1.3976167698646038e-05, "loss": 0.8731, "step": 789 }, { "epoch": 0.055614220344948964, "grad_norm": 2.626134157180786, "learning_rate": 1.3976035921890599e-05, "loss": 0.8549, "step": 790 }, { "epoch": 0.05568461809222105, "grad_norm": 2.4057083129882812, "learning_rate": 1.3975903782444738e-05, "loss": 0.8847, "step": 791 }, { "epoch": 0.05575501583949313, "grad_norm": 2.7630984783172607, "learning_rate": 1.397577128031532e-05, "loss": 0.8382, "step": 792 }, { "epoch": 0.055825413586765224, "grad_norm": 2.34981107711792, "learning_rate": 1.3975638415509235e-05, "loss": 0.7721, "step": 793 }, { "epoch": 0.05589581133403731, "grad_norm": 2.826700210571289, "learning_rate": 1.3975505188033392e-05, "loss": 0.8582, "step": 794 }, { "epoch": 0.0559662090813094, "grad_norm": 2.648467540740967, "learning_rate": 1.3975371597894717e-05, "loss": 0.9139, "step": 795 }, { "epoch": 0.056036606828581484, "grad_norm": 3.2171995639801025, "learning_rate": 1.3975237645100155e-05, "loss": 0.8077, "step": 796 }, { "epoch": 0.056107004575853575, "grad_norm": 2.8273582458496094, "learning_rate": 1.397510332965667e-05, "loss": 0.8328, "step": 797 }, { "epoch": 0.05617740232312566, "grad_norm": 3.3063392639160156, "learning_rate": 1.3974968651571248e-05, "loss": 0.7679, "step": 798 }, { "epoch": 0.05624780007039775, "grad_norm": 2.952239751815796, "learning_rate": 1.3974833610850887e-05, "loss": 0.6983, "step": 799 }, { "epoch": 0.056318197817669835, "grad_norm": 2.5223381519317627, "learning_rate": 1.3974698207502611e-05, "loss": 0.8215, "step": 800 }, { "epoch": 0.05638859556494192, "grad_norm": 2.665891408920288, "learning_rate": 1.3974562441533458e-05, "loss": 0.7619, "step": 801 }, { "epoch": 0.05645899331221401, "grad_norm": 3.7186219692230225, "learning_rate": 1.3974426312950485e-05, "loss": 0.8978, "step": 802 }, { "epoch": 0.056529391059486095, "grad_norm": 2.9490206241607666, "learning_rate": 1.3974289821760775e-05, "loss": 0.8617, "step": 803 }, { "epoch": 0.056599788806758186, "grad_norm": 2.8732612133026123, "learning_rate": 1.3974152967971418e-05, "loss": 0.8459, "step": 804 }, { "epoch": 0.05667018655403027, "grad_norm": 2.5365946292877197, "learning_rate": 1.3974015751589534e-05, "loss": 0.7412, "step": 805 }, { "epoch": 0.05674058430130236, "grad_norm": 3.166045665740967, "learning_rate": 1.3973878172622255e-05, "loss": 0.8991, "step": 806 }, { "epoch": 0.056810982048574446, "grad_norm": 2.456942319869995, "learning_rate": 1.3973740231076735e-05, "loss": 0.7869, "step": 807 }, { "epoch": 0.05688137979584653, "grad_norm": 2.7324979305267334, "learning_rate": 1.397360192696014e-05, "loss": 0.7298, "step": 808 }, { "epoch": 0.05695177754311862, "grad_norm": 2.9507203102111816, "learning_rate": 1.397346326027967e-05, "loss": 0.8127, "step": 809 }, { "epoch": 0.057022175290390706, "grad_norm": 2.5356690883636475, "learning_rate": 1.3973324231042529e-05, "loss": 0.8101, "step": 810 }, { "epoch": 0.0570925730376628, "grad_norm": 2.731660842895508, "learning_rate": 1.3973184839255945e-05, "loss": 0.8733, "step": 811 }, { "epoch": 0.05716297078493488, "grad_norm": 2.4052608013153076, "learning_rate": 1.3973045084927163e-05, "loss": 0.8011, "step": 812 }, { "epoch": 0.05723336853220697, "grad_norm": 2.607543468475342, "learning_rate": 1.3972904968063455e-05, "loss": 0.8972, "step": 813 }, { "epoch": 0.05730376627947906, "grad_norm": 2.789609909057617, "learning_rate": 1.3972764488672102e-05, "loss": 0.8017, "step": 814 }, { "epoch": 0.05737416402675114, "grad_norm": 2.291703224182129, "learning_rate": 1.397262364676041e-05, "loss": 0.8318, "step": 815 }, { "epoch": 0.05744456177402323, "grad_norm": 2.660813570022583, "learning_rate": 1.3972482442335697e-05, "loss": 0.7756, "step": 816 }, { "epoch": 0.05751495952129532, "grad_norm": 2.9769861698150635, "learning_rate": 1.3972340875405308e-05, "loss": 0.798, "step": 817 }, { "epoch": 0.05758535726856741, "grad_norm": 2.650627374649048, "learning_rate": 1.3972198945976603e-05, "loss": 0.7363, "step": 818 }, { "epoch": 0.05765575501583949, "grad_norm": 2.342474937438965, "learning_rate": 1.3972056654056959e-05, "loss": 0.7422, "step": 819 }, { "epoch": 0.057726152763111584, "grad_norm": 3.2747371196746826, "learning_rate": 1.3971913999653776e-05, "loss": 0.7691, "step": 820 }, { "epoch": 0.05779655051038367, "grad_norm": 2.545027732849121, "learning_rate": 1.3971770982774472e-05, "loss": 0.8188, "step": 821 }, { "epoch": 0.05786694825765575, "grad_norm": 2.148822546005249, "learning_rate": 1.3971627603426477e-05, "loss": 0.8305, "step": 822 }, { "epoch": 0.057937346004927844, "grad_norm": 4.2569661140441895, "learning_rate": 1.3971483861617252e-05, "loss": 0.6319, "step": 823 }, { "epoch": 0.05800774375219993, "grad_norm": 2.091796875, "learning_rate": 1.3971339757354267e-05, "loss": 0.8086, "step": 824 }, { "epoch": 0.05807814149947202, "grad_norm": 3.198848247528076, "learning_rate": 1.3971195290645013e-05, "loss": 0.8679, "step": 825 }, { "epoch": 0.0581485392467441, "grad_norm": 2.404658555984497, "learning_rate": 1.3971050461497003e-05, "loss": 0.7893, "step": 826 }, { "epoch": 0.058218936994016195, "grad_norm": 2.7750935554504395, "learning_rate": 1.3970905269917766e-05, "loss": 0.7891, "step": 827 }, { "epoch": 0.05828933474128828, "grad_norm": 5.766189098358154, "learning_rate": 1.3970759715914852e-05, "loss": 0.8984, "step": 828 }, { "epoch": 0.05835973248856036, "grad_norm": 2.7002551555633545, "learning_rate": 1.3970613799495825e-05, "loss": 0.7683, "step": 829 }, { "epoch": 0.058430130235832455, "grad_norm": 2.4011354446411133, "learning_rate": 1.3970467520668275e-05, "loss": 0.7126, "step": 830 }, { "epoch": 0.05850052798310454, "grad_norm": 2.474435567855835, "learning_rate": 1.3970320879439807e-05, "loss": 0.7117, "step": 831 }, { "epoch": 0.05857092573037663, "grad_norm": 2.8345251083374023, "learning_rate": 1.397017387581804e-05, "loss": 0.6874, "step": 832 }, { "epoch": 0.058641323477648714, "grad_norm": 2.2286150455474854, "learning_rate": 1.3970026509810625e-05, "loss": 0.8337, "step": 833 }, { "epoch": 0.058711721224920806, "grad_norm": 2.337022066116333, "learning_rate": 1.3969878781425217e-05, "loss": 0.8015, "step": 834 }, { "epoch": 0.05878211897219289, "grad_norm": 2.785576343536377, "learning_rate": 1.3969730690669501e-05, "loss": 0.8072, "step": 835 }, { "epoch": 0.058852516719464974, "grad_norm": 2.2109322547912598, "learning_rate": 1.3969582237551171e-05, "loss": 0.7654, "step": 836 }, { "epoch": 0.058922914466737066, "grad_norm": 2.5643649101257324, "learning_rate": 1.396943342207795e-05, "loss": 0.7154, "step": 837 }, { "epoch": 0.05899331221400915, "grad_norm": 2.2703800201416016, "learning_rate": 1.3969284244257575e-05, "loss": 0.7333, "step": 838 }, { "epoch": 0.05906370996128124, "grad_norm": 2.5668768882751465, "learning_rate": 1.3969134704097797e-05, "loss": 0.8526, "step": 839 }, { "epoch": 0.059134107708553325, "grad_norm": 4.667150497436523, "learning_rate": 1.3968984801606395e-05, "loss": 0.8812, "step": 840 }, { "epoch": 0.05920450545582542, "grad_norm": 2.428640365600586, "learning_rate": 1.3968834536791164e-05, "loss": 0.7127, "step": 841 }, { "epoch": 0.0592749032030975, "grad_norm": 2.8358137607574463, "learning_rate": 1.3968683909659913e-05, "loss": 0.8006, "step": 842 }, { "epoch": 0.059345300950369585, "grad_norm": 2.904513359069824, "learning_rate": 1.3968532920220474e-05, "loss": 0.7269, "step": 843 }, { "epoch": 0.059415698697641676, "grad_norm": 2.365891933441162, "learning_rate": 1.3968381568480697e-05, "loss": 0.9274, "step": 844 }, { "epoch": 0.05948609644491376, "grad_norm": 2.6593594551086426, "learning_rate": 1.3968229854448452e-05, "loss": 0.6971, "step": 845 }, { "epoch": 0.05955649419218585, "grad_norm": 2.6830294132232666, "learning_rate": 1.3968077778131624e-05, "loss": 0.8179, "step": 846 }, { "epoch": 0.059626891939457936, "grad_norm": 2.8241679668426514, "learning_rate": 1.3967925339538126e-05, "loss": 0.8688, "step": 847 }, { "epoch": 0.05969728968673003, "grad_norm": 3.442892551422119, "learning_rate": 1.3967772538675875e-05, "loss": 0.8506, "step": 848 }, { "epoch": 0.05976768743400211, "grad_norm": 2.785004138946533, "learning_rate": 1.396761937555282e-05, "loss": 0.8459, "step": 849 }, { "epoch": 0.059838085181274196, "grad_norm": 2.54880428314209, "learning_rate": 1.3967465850176926e-05, "loss": 0.8746, "step": 850 }, { "epoch": 0.05990848292854629, "grad_norm": 2.7987897396087646, "learning_rate": 1.396731196255617e-05, "loss": 0.8557, "step": 851 }, { "epoch": 0.05997888067581837, "grad_norm": 3.092935085296631, "learning_rate": 1.3967157712698556e-05, "loss": 0.8721, "step": 852 }, { "epoch": 0.06004927842309046, "grad_norm": 2.317570686340332, "learning_rate": 1.3967003100612103e-05, "loss": 0.7147, "step": 853 }, { "epoch": 0.06011967617036255, "grad_norm": 2.254296064376831, "learning_rate": 1.3966848126304848e-05, "loss": 0.8274, "step": 854 }, { "epoch": 0.06019007391763464, "grad_norm": 2.559051513671875, "learning_rate": 1.3966692789784851e-05, "loss": 0.8253, "step": 855 }, { "epoch": 0.06026047166490672, "grad_norm": 3.6009714603424072, "learning_rate": 1.3966537091060184e-05, "loss": 0.8067, "step": 856 }, { "epoch": 0.06033086941217881, "grad_norm": 2.2450320720672607, "learning_rate": 1.3966381030138945e-05, "loss": 0.783, "step": 857 }, { "epoch": 0.0604012671594509, "grad_norm": 2.143674612045288, "learning_rate": 1.3966224607029247e-05, "loss": 0.8417, "step": 858 }, { "epoch": 0.06047166490672298, "grad_norm": 2.7825140953063965, "learning_rate": 1.3966067821739225e-05, "loss": 0.7448, "step": 859 }, { "epoch": 0.060542062653995074, "grad_norm": 2.6882572174072266, "learning_rate": 1.3965910674277027e-05, "loss": 0.8692, "step": 860 }, { "epoch": 0.06061246040126716, "grad_norm": 2.4077882766723633, "learning_rate": 1.3965753164650822e-05, "loss": 0.7442, "step": 861 }, { "epoch": 0.06068285814853925, "grad_norm": 2.590895652770996, "learning_rate": 1.3965595292868804e-05, "loss": 0.7469, "step": 862 }, { "epoch": 0.060753255895811334, "grad_norm": 2.3315577507019043, "learning_rate": 1.3965437058939178e-05, "loss": 0.8921, "step": 863 }, { "epoch": 0.06082365364308342, "grad_norm": 2.2158522605895996, "learning_rate": 1.3965278462870171e-05, "loss": 0.8957, "step": 864 }, { "epoch": 0.06089405139035551, "grad_norm": 2.5753707885742188, "learning_rate": 1.3965119504670028e-05, "loss": 0.8768, "step": 865 }, { "epoch": 0.060964449137627594, "grad_norm": 2.806683301925659, "learning_rate": 1.3964960184347014e-05, "loss": 0.714, "step": 866 }, { "epoch": 0.061034846884899685, "grad_norm": 2.431647539138794, "learning_rate": 1.3964800501909412e-05, "loss": 0.7876, "step": 867 }, { "epoch": 0.06110524463217177, "grad_norm": 2.6119256019592285, "learning_rate": 1.3964640457365526e-05, "loss": 0.6665, "step": 868 }, { "epoch": 0.06117564237944386, "grad_norm": 2.294191598892212, "learning_rate": 1.3964480050723674e-05, "loss": 0.8057, "step": 869 }, { "epoch": 0.061246040126715945, "grad_norm": 2.7430543899536133, "learning_rate": 1.3964319281992194e-05, "loss": 0.7695, "step": 870 }, { "epoch": 0.06131643787398803, "grad_norm": 2.3632636070251465, "learning_rate": 1.3964158151179451e-05, "loss": 0.7169, "step": 871 }, { "epoch": 0.06138683562126012, "grad_norm": 2.2400379180908203, "learning_rate": 1.3963996658293816e-05, "loss": 0.811, "step": 872 }, { "epoch": 0.061457233368532205, "grad_norm": 2.3603732585906982, "learning_rate": 1.396383480334369e-05, "loss": 0.8874, "step": 873 }, { "epoch": 0.061527631115804296, "grad_norm": 2.2423322200775146, "learning_rate": 1.3963672586337485e-05, "loss": 0.9054, "step": 874 }, { "epoch": 0.06159802886307638, "grad_norm": 2.2417402267456055, "learning_rate": 1.3963510007283636e-05, "loss": 0.7709, "step": 875 }, { "epoch": 0.06166842661034847, "grad_norm": 2.196552276611328, "learning_rate": 1.3963347066190593e-05, "loss": 0.8201, "step": 876 }, { "epoch": 0.061738824357620556, "grad_norm": 3.877890110015869, "learning_rate": 1.3963183763066833e-05, "loss": 0.7808, "step": 877 }, { "epoch": 0.06180922210489264, "grad_norm": 2.4083194732666016, "learning_rate": 1.3963020097920842e-05, "loss": 0.7178, "step": 878 }, { "epoch": 0.06187961985216473, "grad_norm": 2.343318223953247, "learning_rate": 1.3962856070761128e-05, "loss": 0.9155, "step": 879 }, { "epoch": 0.061950017599436816, "grad_norm": 3.645878791809082, "learning_rate": 1.3962691681596223e-05, "loss": 0.772, "step": 880 }, { "epoch": 0.06202041534670891, "grad_norm": 2.770340919494629, "learning_rate": 1.396252693043467e-05, "loss": 0.7787, "step": 881 }, { "epoch": 0.06209081309398099, "grad_norm": 2.5928235054016113, "learning_rate": 1.3962361817285039e-05, "loss": 0.7318, "step": 882 }, { "epoch": 0.06216121084125308, "grad_norm": 3.6422841548919678, "learning_rate": 1.3962196342155907e-05, "loss": 0.7569, "step": 883 }, { "epoch": 0.06223160858852517, "grad_norm": 2.1934292316436768, "learning_rate": 1.3962030505055887e-05, "loss": 0.7066, "step": 884 }, { "epoch": 0.06230200633579725, "grad_norm": 2.57673716545105, "learning_rate": 1.3961864305993592e-05, "loss": 0.8142, "step": 885 }, { "epoch": 0.06237240408306934, "grad_norm": 2.441199541091919, "learning_rate": 1.3961697744977668e-05, "loss": 0.8148, "step": 886 }, { "epoch": 0.06244280183034143, "grad_norm": 2.782151699066162, "learning_rate": 1.3961530822016773e-05, "loss": 0.8528, "step": 887 }, { "epoch": 0.06251319957761352, "grad_norm": 2.274099588394165, "learning_rate": 1.3961363537119586e-05, "loss": 0.7547, "step": 888 }, { "epoch": 0.06258359732488561, "grad_norm": 2.8162405490875244, "learning_rate": 1.3961195890294807e-05, "loss": 0.7486, "step": 889 }, { "epoch": 0.06265399507215769, "grad_norm": 2.324718713760376, "learning_rate": 1.3961027881551145e-05, "loss": 0.8116, "step": 890 }, { "epoch": 0.06272439281942978, "grad_norm": 2.2138397693634033, "learning_rate": 1.3960859510897341e-05, "loss": 0.9492, "step": 891 }, { "epoch": 0.06279479056670187, "grad_norm": 2.5554590225219727, "learning_rate": 1.3960690778342147e-05, "loss": 0.8364, "step": 892 }, { "epoch": 0.06286518831397395, "grad_norm": 2.6081950664520264, "learning_rate": 1.3960521683894334e-05, "loss": 0.8786, "step": 893 }, { "epoch": 0.06293558606124604, "grad_norm": 2.758977174758911, "learning_rate": 1.3960352227562696e-05, "loss": 0.7312, "step": 894 }, { "epoch": 0.06300598380851813, "grad_norm": 2.5388519763946533, "learning_rate": 1.3960182409356042e-05, "loss": 0.9258, "step": 895 }, { "epoch": 0.06307638155579022, "grad_norm": 2.8218441009521484, "learning_rate": 1.3960012229283202e-05, "loss": 0.8577, "step": 896 }, { "epoch": 0.0631467793030623, "grad_norm": 2.5186567306518555, "learning_rate": 1.395984168735302e-05, "loss": 0.6195, "step": 897 }, { "epoch": 0.06321717705033439, "grad_norm": 2.2771897315979004, "learning_rate": 1.395967078357437e-05, "loss": 0.7535, "step": 898 }, { "epoch": 0.06328757479760648, "grad_norm": 2.7869532108306885, "learning_rate": 1.3959499517956129e-05, "loss": 0.8432, "step": 899 }, { "epoch": 0.06335797254487856, "grad_norm": 2.3160929679870605, "learning_rate": 1.3959327890507207e-05, "loss": 0.8497, "step": 900 }, { "epoch": 0.06342837029215065, "grad_norm": 2.9046618938446045, "learning_rate": 1.3959155901236526e-05, "loss": 0.8242, "step": 901 }, { "epoch": 0.06349876803942274, "grad_norm": 2.5057413578033447, "learning_rate": 1.3958983550153024e-05, "loss": 0.8951, "step": 902 }, { "epoch": 0.06356916578669483, "grad_norm": 2.5708539485931396, "learning_rate": 1.3958810837265666e-05, "loss": 0.7396, "step": 903 }, { "epoch": 0.06363956353396691, "grad_norm": 2.965043306350708, "learning_rate": 1.3958637762583432e-05, "loss": 0.8626, "step": 904 }, { "epoch": 0.063709961281239, "grad_norm": 2.5182418823242188, "learning_rate": 1.3958464326115317e-05, "loss": 0.8784, "step": 905 }, { "epoch": 0.06378035902851109, "grad_norm": 2.505825996398926, "learning_rate": 1.395829052787034e-05, "loss": 0.8719, "step": 906 }, { "epoch": 0.06385075677578317, "grad_norm": 2.88649320602417, "learning_rate": 1.3958116367857537e-05, "loss": 0.8756, "step": 907 }, { "epoch": 0.06392115452305526, "grad_norm": 2.516996383666992, "learning_rate": 1.3957941846085964e-05, "loss": 0.8033, "step": 908 }, { "epoch": 0.06399155227032735, "grad_norm": 2.2492072582244873, "learning_rate": 1.395776696256469e-05, "loss": 0.555, "step": 909 }, { "epoch": 0.06406195001759944, "grad_norm": 2.584751844406128, "learning_rate": 1.3957591717302811e-05, "loss": 0.7872, "step": 910 }, { "epoch": 0.06413234776487152, "grad_norm": 2.3776657581329346, "learning_rate": 1.3957416110309438e-05, "loss": 0.7906, "step": 911 }, { "epoch": 0.06420274551214361, "grad_norm": 2.716198682785034, "learning_rate": 1.39572401415937e-05, "loss": 0.9037, "step": 912 }, { "epoch": 0.0642731432594157, "grad_norm": 2.3903534412384033, "learning_rate": 1.3957063811164747e-05, "loss": 0.7966, "step": 913 }, { "epoch": 0.06434354100668778, "grad_norm": 3.060811996459961, "learning_rate": 1.3956887119031746e-05, "loss": 0.862, "step": 914 }, { "epoch": 0.06441393875395987, "grad_norm": 2.5334577560424805, "learning_rate": 1.3956710065203883e-05, "loss": 0.8558, "step": 915 }, { "epoch": 0.06448433650123196, "grad_norm": 3.0273704528808594, "learning_rate": 1.3956532649690363e-05, "loss": 0.7837, "step": 916 }, { "epoch": 0.06455473424850405, "grad_norm": 2.472519636154175, "learning_rate": 1.3956354872500409e-05, "loss": 0.758, "step": 917 }, { "epoch": 0.06462513199577613, "grad_norm": 2.5911266803741455, "learning_rate": 1.3956176733643265e-05, "loss": 0.8698, "step": 918 }, { "epoch": 0.06469552974304822, "grad_norm": 3.0595457553863525, "learning_rate": 1.3955998233128195e-05, "loss": 0.8511, "step": 919 }, { "epoch": 0.06476592749032031, "grad_norm": 3.6815438270568848, "learning_rate": 1.3955819370964476e-05, "loss": 0.8021, "step": 920 }, { "epoch": 0.06483632523759239, "grad_norm": 2.643683433532715, "learning_rate": 1.3955640147161409e-05, "loss": 0.8538, "step": 921 }, { "epoch": 0.06490672298486448, "grad_norm": 2.6022913455963135, "learning_rate": 1.395546056172831e-05, "loss": 0.8202, "step": 922 }, { "epoch": 0.06497712073213657, "grad_norm": 2.823735475540161, "learning_rate": 1.3955280614674517e-05, "loss": 0.7663, "step": 923 }, { "epoch": 0.06504751847940866, "grad_norm": 2.8828020095825195, "learning_rate": 1.3955100306009388e-05, "loss": 0.7653, "step": 924 }, { "epoch": 0.06511791622668074, "grad_norm": 2.5093917846679688, "learning_rate": 1.3954919635742294e-05, "loss": 0.8627, "step": 925 }, { "epoch": 0.06518831397395283, "grad_norm": 2.5644984245300293, "learning_rate": 1.3954738603882629e-05, "loss": 0.8325, "step": 926 }, { "epoch": 0.06525871172122492, "grad_norm": 2.372300624847412, "learning_rate": 1.3954557210439807e-05, "loss": 0.7594, "step": 927 }, { "epoch": 0.065329109468497, "grad_norm": 2.7516050338745117, "learning_rate": 1.3954375455423254e-05, "loss": 0.7429, "step": 928 }, { "epoch": 0.06539950721576909, "grad_norm": 2.5907065868377686, "learning_rate": 1.3954193338842425e-05, "loss": 0.7783, "step": 929 }, { "epoch": 0.06546990496304118, "grad_norm": 2.336833953857422, "learning_rate": 1.3954010860706786e-05, "loss": 0.9542, "step": 930 }, { "epoch": 0.06554030271031328, "grad_norm": 2.9466209411621094, "learning_rate": 1.3953828021025824e-05, "loss": 0.7254, "step": 931 }, { "epoch": 0.06561070045758535, "grad_norm": 2.976130723953247, "learning_rate": 1.3953644819809048e-05, "loss": 0.8004, "step": 932 }, { "epoch": 0.06568109820485744, "grad_norm": 2.1289875507354736, "learning_rate": 1.3953461257065976e-05, "loss": 0.8877, "step": 933 }, { "epoch": 0.06575149595212954, "grad_norm": 2.133739709854126, "learning_rate": 1.395327733280616e-05, "loss": 0.6603, "step": 934 }, { "epoch": 0.06582189369940161, "grad_norm": 2.2336537837982178, "learning_rate": 1.3953093047039154e-05, "loss": 0.802, "step": 935 }, { "epoch": 0.0658922914466737, "grad_norm": 2.4950695037841797, "learning_rate": 1.3952908399774547e-05, "loss": 0.8386, "step": 936 }, { "epoch": 0.0659626891939458, "grad_norm": 2.5089645385742188, "learning_rate": 1.3952723391021932e-05, "loss": 0.7866, "step": 937 }, { "epoch": 0.06603308694121789, "grad_norm": 2.930619716644287, "learning_rate": 1.3952538020790935e-05, "loss": 0.8368, "step": 938 }, { "epoch": 0.06610348468848996, "grad_norm": 2.3006300926208496, "learning_rate": 1.3952352289091189e-05, "loss": 0.914, "step": 939 }, { "epoch": 0.06617388243576205, "grad_norm": 2.725554943084717, "learning_rate": 1.3952166195932348e-05, "loss": 0.7082, "step": 940 }, { "epoch": 0.06624428018303415, "grad_norm": 2.626526355743408, "learning_rate": 1.3951979741324091e-05, "loss": 0.8396, "step": 941 }, { "epoch": 0.06631467793030622, "grad_norm": 2.8915317058563232, "learning_rate": 1.3951792925276112e-05, "loss": 0.7729, "step": 942 }, { "epoch": 0.06638507567757831, "grad_norm": 2.6034092903137207, "learning_rate": 1.3951605747798124e-05, "loss": 0.8324, "step": 943 }, { "epoch": 0.0664554734248504, "grad_norm": 2.2596564292907715, "learning_rate": 1.3951418208899856e-05, "loss": 0.9193, "step": 944 }, { "epoch": 0.0665258711721225, "grad_norm": 3.0044050216674805, "learning_rate": 1.3951230308591062e-05, "loss": 0.8466, "step": 945 }, { "epoch": 0.06659626891939457, "grad_norm": 3.2138712406158447, "learning_rate": 1.3951042046881506e-05, "loss": 0.8262, "step": 946 }, { "epoch": 0.06666666666666667, "grad_norm": 2.708765983581543, "learning_rate": 1.395085342378098e-05, "loss": 0.7262, "step": 947 }, { "epoch": 0.06673706441393876, "grad_norm": 2.777843713760376, "learning_rate": 1.395066443929929e-05, "loss": 0.747, "step": 948 }, { "epoch": 0.06680746216121083, "grad_norm": 2.318438768386841, "learning_rate": 1.3950475093446258e-05, "loss": 0.6696, "step": 949 }, { "epoch": 0.06687785990848293, "grad_norm": 2.8077874183654785, "learning_rate": 1.3950285386231735e-05, "loss": 0.8441, "step": 950 }, { "epoch": 0.06694825765575502, "grad_norm": 2.369778871536255, "learning_rate": 1.395009531766558e-05, "loss": 0.8648, "step": 951 }, { "epoch": 0.06701865540302711, "grad_norm": 3.0671184062957764, "learning_rate": 1.3949904887757672e-05, "loss": 0.7907, "step": 952 }, { "epoch": 0.06708905315029919, "grad_norm": 2.6719393730163574, "learning_rate": 1.3949714096517917e-05, "loss": 0.7698, "step": 953 }, { "epoch": 0.06715945089757128, "grad_norm": 2.499262809753418, "learning_rate": 1.394952294395623e-05, "loss": 0.6852, "step": 954 }, { "epoch": 0.06722984864484337, "grad_norm": 3.1569724082946777, "learning_rate": 1.3949331430082553e-05, "loss": 0.8786, "step": 955 }, { "epoch": 0.06730024639211546, "grad_norm": 2.5234477519989014, "learning_rate": 1.394913955490684e-05, "loss": 0.7482, "step": 956 }, { "epoch": 0.06737064413938754, "grad_norm": 2.2858223915100098, "learning_rate": 1.394894731843907e-05, "loss": 0.8151, "step": 957 }, { "epoch": 0.06744104188665963, "grad_norm": 2.7205429077148438, "learning_rate": 1.3948754720689235e-05, "loss": 0.7942, "step": 958 }, { "epoch": 0.06751143963393172, "grad_norm": 3.3332679271698, "learning_rate": 1.3948561761667348e-05, "loss": 0.8172, "step": 959 }, { "epoch": 0.0675818373812038, "grad_norm": 2.451005458831787, "learning_rate": 1.3948368441383442e-05, "loss": 0.8328, "step": 960 }, { "epoch": 0.06765223512847589, "grad_norm": 2.8664941787719727, "learning_rate": 1.394817475984757e-05, "loss": 0.9442, "step": 961 }, { "epoch": 0.06772263287574798, "grad_norm": 2.4130663871765137, "learning_rate": 1.3947980717069798e-05, "loss": 0.7826, "step": 962 }, { "epoch": 0.06779303062302007, "grad_norm": 2.7792553901672363, "learning_rate": 1.3947786313060217e-05, "loss": 0.8956, "step": 963 }, { "epoch": 0.06786342837029215, "grad_norm": 2.4310874938964844, "learning_rate": 1.394759154782893e-05, "loss": 0.8183, "step": 964 }, { "epoch": 0.06793382611756424, "grad_norm": 2.2132728099823, "learning_rate": 1.394739642138607e-05, "loss": 0.8695, "step": 965 }, { "epoch": 0.06800422386483633, "grad_norm": 2.590578556060791, "learning_rate": 1.3947200933741775e-05, "loss": 0.8263, "step": 966 }, { "epoch": 0.06807462161210841, "grad_norm": 3.2390189170837402, "learning_rate": 1.3947005084906214e-05, "loss": 0.7805, "step": 967 }, { "epoch": 0.0681450193593805, "grad_norm": 2.4756510257720947, "learning_rate": 1.3946808874889567e-05, "loss": 0.7554, "step": 968 }, { "epoch": 0.06821541710665259, "grad_norm": 2.925624370574951, "learning_rate": 1.3946612303702034e-05, "loss": 0.76, "step": 969 }, { "epoch": 0.06828581485392468, "grad_norm": 2.5858829021453857, "learning_rate": 1.3946415371353837e-05, "loss": 0.8139, "step": 970 }, { "epoch": 0.06835621260119676, "grad_norm": 2.9992947578430176, "learning_rate": 1.3946218077855214e-05, "loss": 0.7655, "step": 971 }, { "epoch": 0.06842661034846885, "grad_norm": 2.6279289722442627, "learning_rate": 1.3946020423216422e-05, "loss": 0.8007, "step": 972 }, { "epoch": 0.06849700809574094, "grad_norm": 3.1828341484069824, "learning_rate": 1.3945822407447737e-05, "loss": 0.8132, "step": 973 }, { "epoch": 0.06856740584301302, "grad_norm": 2.6185081005096436, "learning_rate": 1.3945624030559456e-05, "loss": 0.7004, "step": 974 }, { "epoch": 0.06863780359028511, "grad_norm": 2.731748342514038, "learning_rate": 1.3945425292561889e-05, "loss": 0.6964, "step": 975 }, { "epoch": 0.0687082013375572, "grad_norm": 2.537097454071045, "learning_rate": 1.3945226193465372e-05, "loss": 0.8247, "step": 976 }, { "epoch": 0.06877859908482929, "grad_norm": 2.258087158203125, "learning_rate": 1.3945026733280255e-05, "loss": 0.6482, "step": 977 }, { "epoch": 0.06884899683210137, "grad_norm": 2.617506265640259, "learning_rate": 1.394482691201691e-05, "loss": 0.7944, "step": 978 }, { "epoch": 0.06891939457937346, "grad_norm": 2.6988179683685303, "learning_rate": 1.3944626729685724e-05, "loss": 0.7084, "step": 979 }, { "epoch": 0.06898979232664555, "grad_norm": 2.727134943008423, "learning_rate": 1.3944426186297103e-05, "loss": 0.8194, "step": 980 }, { "epoch": 0.06906019007391763, "grad_norm": 2.4431238174438477, "learning_rate": 1.3944225281861477e-05, "loss": 0.8124, "step": 981 }, { "epoch": 0.06913058782118972, "grad_norm": 2.46311092376709, "learning_rate": 1.3944024016389288e-05, "loss": 0.9192, "step": 982 }, { "epoch": 0.06920098556846181, "grad_norm": 2.7169837951660156, "learning_rate": 1.3943822389891002e-05, "loss": 0.841, "step": 983 }, { "epoch": 0.0692713833157339, "grad_norm": 2.5774283409118652, "learning_rate": 1.39436204023771e-05, "loss": 0.8443, "step": 984 }, { "epoch": 0.06934178106300598, "grad_norm": 2.657177448272705, "learning_rate": 1.3943418053858087e-05, "loss": 0.755, "step": 985 }, { "epoch": 0.06941217881027807, "grad_norm": 2.262249708175659, "learning_rate": 1.3943215344344482e-05, "loss": 0.8641, "step": 986 }, { "epoch": 0.06948257655755016, "grad_norm": 2.59678053855896, "learning_rate": 1.394301227384682e-05, "loss": 0.7825, "step": 987 }, { "epoch": 0.06955297430482224, "grad_norm": 4.437134265899658, "learning_rate": 1.3942808842375664e-05, "loss": 0.8252, "step": 988 }, { "epoch": 0.06962337205209433, "grad_norm": 2.383888006210327, "learning_rate": 1.3942605049941587e-05, "loss": 0.7879, "step": 989 }, { "epoch": 0.06969376979936642, "grad_norm": 2.4319252967834473, "learning_rate": 1.3942400896555186e-05, "loss": 0.9268, "step": 990 }, { "epoch": 0.06976416754663851, "grad_norm": 2.6267049312591553, "learning_rate": 1.3942196382227078e-05, "loss": 0.8338, "step": 991 }, { "epoch": 0.06983456529391059, "grad_norm": 2.6093499660491943, "learning_rate": 1.3941991506967891e-05, "loss": 0.7347, "step": 992 }, { "epoch": 0.06990496304118268, "grad_norm": 2.0639071464538574, "learning_rate": 1.3941786270788276e-05, "loss": 0.7907, "step": 993 }, { "epoch": 0.06997536078845477, "grad_norm": 3.0175085067749023, "learning_rate": 1.3941580673698909e-05, "loss": 0.7355, "step": 994 }, { "epoch": 0.07004575853572685, "grad_norm": 2.684511423110962, "learning_rate": 1.3941374715710476e-05, "loss": 0.6537, "step": 995 }, { "epoch": 0.07011615628299894, "grad_norm": 2.2390151023864746, "learning_rate": 1.3941168396833684e-05, "loss": 0.698, "step": 996 }, { "epoch": 0.07018655403027103, "grad_norm": 2.383263111114502, "learning_rate": 1.3940961717079261e-05, "loss": 0.7268, "step": 997 }, { "epoch": 0.07025695177754313, "grad_norm": 2.4326331615448, "learning_rate": 1.394075467645795e-05, "loss": 0.7216, "step": 998 }, { "epoch": 0.0703273495248152, "grad_norm": 2.4525201320648193, "learning_rate": 1.3940547274980522e-05, "loss": 0.7209, "step": 999 }, { "epoch": 0.0703977472720873, "grad_norm": 2.4150009155273438, "learning_rate": 1.3940339512657751e-05, "loss": 0.7812, "step": 1000 }, { "epoch": 0.07046814501935939, "grad_norm": 2.7470998764038086, "learning_rate": 1.3940131389500446e-05, "loss": 0.9454, "step": 1001 }, { "epoch": 0.07053854276663146, "grad_norm": 2.7927896976470947, "learning_rate": 1.3939922905519424e-05, "loss": 0.852, "step": 1002 }, { "epoch": 0.07060894051390355, "grad_norm": 2.1819887161254883, "learning_rate": 1.3939714060725525e-05, "loss": 0.7457, "step": 1003 }, { "epoch": 0.07067933826117564, "grad_norm": 2.189953327178955, "learning_rate": 1.3939504855129605e-05, "loss": 0.7914, "step": 1004 }, { "epoch": 0.07074973600844774, "grad_norm": 2.948251485824585, "learning_rate": 1.3939295288742546e-05, "loss": 0.7367, "step": 1005 }, { "epoch": 0.07082013375571981, "grad_norm": 2.8013503551483154, "learning_rate": 1.393908536157524e-05, "loss": 0.8736, "step": 1006 }, { "epoch": 0.0708905315029919, "grad_norm": 2.2470145225524902, "learning_rate": 1.39388750736386e-05, "loss": 0.8063, "step": 1007 }, { "epoch": 0.070960929250264, "grad_norm": 2.463219165802002, "learning_rate": 1.393866442494356e-05, "loss": 0.814, "step": 1008 }, { "epoch": 0.07103132699753607, "grad_norm": 2.3860814571380615, "learning_rate": 1.3938453415501076e-05, "loss": 0.8458, "step": 1009 }, { "epoch": 0.07110172474480816, "grad_norm": 2.907344102859497, "learning_rate": 1.3938242045322112e-05, "loss": 0.8431, "step": 1010 }, { "epoch": 0.07117212249208026, "grad_norm": 1.891729712486267, "learning_rate": 1.3938030314417662e-05, "loss": 0.9015, "step": 1011 }, { "epoch": 0.07124252023935235, "grad_norm": 2.1517698764801025, "learning_rate": 1.3937818222798732e-05, "loss": 0.823, "step": 1012 }, { "epoch": 0.07131291798662442, "grad_norm": 2.2781972885131836, "learning_rate": 1.393760577047635e-05, "loss": 0.7062, "step": 1013 }, { "epoch": 0.07138331573389652, "grad_norm": 2.3424575328826904, "learning_rate": 1.3937392957461559e-05, "loss": 0.7659, "step": 1014 }, { "epoch": 0.07145371348116861, "grad_norm": 2.90763783454895, "learning_rate": 1.3937179783765427e-05, "loss": 0.665, "step": 1015 }, { "epoch": 0.07152411122844068, "grad_norm": 2.2444393634796143, "learning_rate": 1.3936966249399035e-05, "loss": 0.9269, "step": 1016 }, { "epoch": 0.07159450897571278, "grad_norm": 2.8298094272613525, "learning_rate": 1.3936752354373485e-05, "loss": 0.8599, "step": 1017 }, { "epoch": 0.07166490672298487, "grad_norm": 2.283485174179077, "learning_rate": 1.3936538098699898e-05, "loss": 0.9594, "step": 1018 }, { "epoch": 0.07173530447025696, "grad_norm": 2.510566473007202, "learning_rate": 1.3936323482389413e-05, "loss": 0.7105, "step": 1019 }, { "epoch": 0.07180570221752904, "grad_norm": 2.8578338623046875, "learning_rate": 1.3936108505453188e-05, "loss": 0.8545, "step": 1020 }, { "epoch": 0.07187609996480113, "grad_norm": 2.3185393810272217, "learning_rate": 1.39358931679024e-05, "loss": 0.7891, "step": 1021 }, { "epoch": 0.07194649771207322, "grad_norm": 2.1223130226135254, "learning_rate": 1.3935677469748245e-05, "loss": 0.7416, "step": 1022 }, { "epoch": 0.0720168954593453, "grad_norm": 3.4597904682159424, "learning_rate": 1.3935461411001937e-05, "loss": 0.7913, "step": 1023 }, { "epoch": 0.07208729320661739, "grad_norm": 2.9353513717651367, "learning_rate": 1.3935244991674708e-05, "loss": 0.7688, "step": 1024 }, { "epoch": 0.07215769095388948, "grad_norm": 2.6923165321350098, "learning_rate": 1.3935028211777813e-05, "loss": 0.8269, "step": 1025 }, { "epoch": 0.07222808870116157, "grad_norm": 2.4170939922332764, "learning_rate": 1.393481107132252e-05, "loss": 0.867, "step": 1026 }, { "epoch": 0.07229848644843365, "grad_norm": 2.5160932540893555, "learning_rate": 1.3934593570320117e-05, "loss": 0.8418, "step": 1027 }, { "epoch": 0.07236888419570574, "grad_norm": 2.5388927459716797, "learning_rate": 1.3934375708781917e-05, "loss": 0.8047, "step": 1028 }, { "epoch": 0.07243928194297783, "grad_norm": 2.607654094696045, "learning_rate": 1.3934157486719242e-05, "loss": 0.7792, "step": 1029 }, { "epoch": 0.0725096796902499, "grad_norm": 2.4479799270629883, "learning_rate": 1.393393890414344e-05, "loss": 0.7874, "step": 1030 }, { "epoch": 0.072580077437522, "grad_norm": 2.4575772285461426, "learning_rate": 1.3933719961065873e-05, "loss": 0.8379, "step": 1031 }, { "epoch": 0.07265047518479409, "grad_norm": 2.1588051319122314, "learning_rate": 1.3933500657497927e-05, "loss": 0.7105, "step": 1032 }, { "epoch": 0.07272087293206618, "grad_norm": 2.3705193996429443, "learning_rate": 1.3933280993451003e-05, "loss": 0.8423, "step": 1033 }, { "epoch": 0.07279127067933826, "grad_norm": 2.5429670810699463, "learning_rate": 1.393306096893652e-05, "loss": 0.8712, "step": 1034 }, { "epoch": 0.07286166842661035, "grad_norm": 2.8496360778808594, "learning_rate": 1.3932840583965918e-05, "loss": 0.7684, "step": 1035 }, { "epoch": 0.07293206617388244, "grad_norm": 3.180054187774658, "learning_rate": 1.3932619838550656e-05, "loss": 0.7796, "step": 1036 }, { "epoch": 0.07300246392115452, "grad_norm": 2.1675944328308105, "learning_rate": 1.393239873270221e-05, "loss": 0.8011, "step": 1037 }, { "epoch": 0.07307286166842661, "grad_norm": 2.1751883029937744, "learning_rate": 1.3932177266432075e-05, "loss": 0.7815, "step": 1038 }, { "epoch": 0.0731432594156987, "grad_norm": 2.3250463008880615, "learning_rate": 1.3931955439751768e-05, "loss": 0.7085, "step": 1039 }, { "epoch": 0.07321365716297079, "grad_norm": 2.6485543251037598, "learning_rate": 1.3931733252672818e-05, "loss": 0.8421, "step": 1040 }, { "epoch": 0.07328405491024287, "grad_norm": 2.3346197605133057, "learning_rate": 1.3931510705206778e-05, "loss": 0.7984, "step": 1041 }, { "epoch": 0.07335445265751496, "grad_norm": 2.98880672454834, "learning_rate": 1.393128779736522e-05, "loss": 0.8117, "step": 1042 }, { "epoch": 0.07342485040478705, "grad_norm": 2.919203042984009, "learning_rate": 1.393106452915973e-05, "loss": 0.9268, "step": 1043 }, { "epoch": 0.07349524815205913, "grad_norm": 3.87673020362854, "learning_rate": 1.393084090060192e-05, "loss": 0.8939, "step": 1044 }, { "epoch": 0.07356564589933122, "grad_norm": 2.200892448425293, "learning_rate": 1.3930616911703412e-05, "loss": 0.7958, "step": 1045 }, { "epoch": 0.07363604364660331, "grad_norm": 2.1954853534698486, "learning_rate": 1.3930392562475857e-05, "loss": 0.8652, "step": 1046 }, { "epoch": 0.0737064413938754, "grad_norm": 2.35556960105896, "learning_rate": 1.3930167852930916e-05, "loss": 0.7573, "step": 1047 }, { "epoch": 0.07377683914114748, "grad_norm": 2.7508351802825928, "learning_rate": 1.3929942783080271e-05, "loss": 0.8458, "step": 1048 }, { "epoch": 0.07384723688841957, "grad_norm": 2.088918685913086, "learning_rate": 1.3929717352935623e-05, "loss": 0.644, "step": 1049 }, { "epoch": 0.07391763463569166, "grad_norm": 2.0359601974487305, "learning_rate": 1.3929491562508697e-05, "loss": 0.7599, "step": 1050 }, { "epoch": 0.07398803238296374, "grad_norm": 2.542320728302002, "learning_rate": 1.3929265411811227e-05, "loss": 0.8338, "step": 1051 }, { "epoch": 0.07405843013023583, "grad_norm": 2.520738363265991, "learning_rate": 1.3929038900854975e-05, "loss": 0.8378, "step": 1052 }, { "epoch": 0.07412882787750792, "grad_norm": 2.5593581199645996, "learning_rate": 1.3928812029651712e-05, "loss": 0.6889, "step": 1053 }, { "epoch": 0.07419922562478001, "grad_norm": 2.6799166202545166, "learning_rate": 1.3928584798213238e-05, "loss": 0.8063, "step": 1054 }, { "epoch": 0.07426962337205209, "grad_norm": 2.3922972679138184, "learning_rate": 1.3928357206551365e-05, "loss": 0.8827, "step": 1055 }, { "epoch": 0.07434002111932418, "grad_norm": 2.9360947608947754, "learning_rate": 1.3928129254677927e-05, "loss": 0.8623, "step": 1056 }, { "epoch": 0.07441041886659627, "grad_norm": 2.47277569770813, "learning_rate": 1.3927900942604773e-05, "loss": 0.866, "step": 1057 }, { "epoch": 0.07448081661386835, "grad_norm": 2.4367527961730957, "learning_rate": 1.3927672270343776e-05, "loss": 0.7611, "step": 1058 }, { "epoch": 0.07455121436114044, "grad_norm": 2.37290096282959, "learning_rate": 1.3927443237906826e-05, "loss": 0.7717, "step": 1059 }, { "epoch": 0.07462161210841253, "grad_norm": 2.6298134326934814, "learning_rate": 1.3927213845305825e-05, "loss": 0.8921, "step": 1060 }, { "epoch": 0.07469200985568462, "grad_norm": 3.458796977996826, "learning_rate": 1.3926984092552704e-05, "loss": 0.7997, "step": 1061 }, { "epoch": 0.0747624076029567, "grad_norm": 2.196821451187134, "learning_rate": 1.3926753979659404e-05, "loss": 0.7017, "step": 1062 }, { "epoch": 0.07483280535022879, "grad_norm": 2.338628053665161, "learning_rate": 1.3926523506637893e-05, "loss": 0.8007, "step": 1063 }, { "epoch": 0.07490320309750088, "grad_norm": 2.6149370670318604, "learning_rate": 1.3926292673500153e-05, "loss": 0.7721, "step": 1064 }, { "epoch": 0.07497360084477296, "grad_norm": 2.2722558975219727, "learning_rate": 1.3926061480258183e-05, "loss": 0.8677, "step": 1065 }, { "epoch": 0.07504399859204505, "grad_norm": 2.337745428085327, "learning_rate": 1.3925829926924005e-05, "loss": 0.7921, "step": 1066 }, { "epoch": 0.07511439633931714, "grad_norm": 2.441837787628174, "learning_rate": 1.3925598013509656e-05, "loss": 0.7871, "step": 1067 }, { "epoch": 0.07518479408658924, "grad_norm": 2.2845687866210938, "learning_rate": 1.3925365740027192e-05, "loss": 0.7445, "step": 1068 }, { "epoch": 0.07525519183386131, "grad_norm": 2.469635009765625, "learning_rate": 1.3925133106488694e-05, "loss": 0.8387, "step": 1069 }, { "epoch": 0.0753255895811334, "grad_norm": 2.2762258052825928, "learning_rate": 1.3924900112906252e-05, "loss": 0.8014, "step": 1070 }, { "epoch": 0.0753959873284055, "grad_norm": 2.3023054599761963, "learning_rate": 1.3924666759291983e-05, "loss": 0.8065, "step": 1071 }, { "epoch": 0.07546638507567757, "grad_norm": 2.5582659244537354, "learning_rate": 1.3924433045658017e-05, "loss": 0.8776, "step": 1072 }, { "epoch": 0.07553678282294966, "grad_norm": 2.3534300327301025, "learning_rate": 1.3924198972016508e-05, "loss": 0.7587, "step": 1073 }, { "epoch": 0.07560718057022175, "grad_norm": 2.9335005283355713, "learning_rate": 1.392396453837962e-05, "loss": 0.8025, "step": 1074 }, { "epoch": 0.07567757831749385, "grad_norm": 2.695983409881592, "learning_rate": 1.3923729744759548e-05, "loss": 0.7857, "step": 1075 }, { "epoch": 0.07574797606476592, "grad_norm": 2.541840076446533, "learning_rate": 1.3923494591168495e-05, "loss": 0.8693, "step": 1076 }, { "epoch": 0.07581837381203801, "grad_norm": 4.434484004974365, "learning_rate": 1.3923259077618688e-05, "loss": 0.8389, "step": 1077 }, { "epoch": 0.0758887715593101, "grad_norm": 3.152869462966919, "learning_rate": 1.392302320412237e-05, "loss": 0.7761, "step": 1078 }, { "epoch": 0.07595916930658218, "grad_norm": 2.712646245956421, "learning_rate": 1.3922786970691809e-05, "loss": 0.7359, "step": 1079 }, { "epoch": 0.07602956705385427, "grad_norm": 3.388559341430664, "learning_rate": 1.3922550377339281e-05, "loss": 0.9247, "step": 1080 }, { "epoch": 0.07609996480112637, "grad_norm": 2.246995687484741, "learning_rate": 1.392231342407709e-05, "loss": 0.7172, "step": 1081 }, { "epoch": 0.07617036254839846, "grad_norm": 2.3917324542999268, "learning_rate": 1.3922076110917556e-05, "loss": 0.8817, "step": 1082 }, { "epoch": 0.07624076029567053, "grad_norm": 2.307525873184204, "learning_rate": 1.3921838437873013e-05, "loss": 0.7499, "step": 1083 }, { "epoch": 0.07631115804294263, "grad_norm": 2.585801362991333, "learning_rate": 1.3921600404955824e-05, "loss": 0.7765, "step": 1084 }, { "epoch": 0.07638155579021472, "grad_norm": 2.4935460090637207, "learning_rate": 1.392136201217836e-05, "loss": 0.8588, "step": 1085 }, { "epoch": 0.0764519535374868, "grad_norm": 2.640094518661499, "learning_rate": 1.3921123259553017e-05, "loss": 0.8392, "step": 1086 }, { "epoch": 0.07652235128475889, "grad_norm": 2.3773410320281982, "learning_rate": 1.3920884147092208e-05, "loss": 0.7789, "step": 1087 }, { "epoch": 0.07659274903203098, "grad_norm": 2.4625816345214844, "learning_rate": 1.3920644674808364e-05, "loss": 0.8092, "step": 1088 }, { "epoch": 0.07666314677930307, "grad_norm": 2.482403039932251, "learning_rate": 1.3920404842713935e-05, "loss": 0.8263, "step": 1089 }, { "epoch": 0.07673354452657515, "grad_norm": 2.3082869052886963, "learning_rate": 1.3920164650821391e-05, "loss": 0.7719, "step": 1090 }, { "epoch": 0.07680394227384724, "grad_norm": 2.3619649410247803, "learning_rate": 1.3919924099143219e-05, "loss": 0.766, "step": 1091 }, { "epoch": 0.07687434002111933, "grad_norm": 5.365654945373535, "learning_rate": 1.3919683187691927e-05, "loss": 0.7163, "step": 1092 }, { "epoch": 0.0769447377683914, "grad_norm": 2.377168893814087, "learning_rate": 1.391944191648004e-05, "loss": 0.875, "step": 1093 }, { "epoch": 0.0770151355156635, "grad_norm": 2.7836620807647705, "learning_rate": 1.3919200285520099e-05, "loss": 0.7892, "step": 1094 }, { "epoch": 0.07708553326293559, "grad_norm": 2.541724681854248, "learning_rate": 1.3918958294824671e-05, "loss": 0.8978, "step": 1095 }, { "epoch": 0.07715593101020768, "grad_norm": 2.6473701000213623, "learning_rate": 1.3918715944406332e-05, "loss": 0.8525, "step": 1096 }, { "epoch": 0.07722632875747976, "grad_norm": 2.8525781631469727, "learning_rate": 1.3918473234277689e-05, "loss": 0.9435, "step": 1097 }, { "epoch": 0.07729672650475185, "grad_norm": 2.5585198402404785, "learning_rate": 1.3918230164451354e-05, "loss": 0.6999, "step": 1098 }, { "epoch": 0.07736712425202394, "grad_norm": 3.036473035812378, "learning_rate": 1.3917986734939968e-05, "loss": 0.8294, "step": 1099 }, { "epoch": 0.07743752199929602, "grad_norm": 2.722968578338623, "learning_rate": 1.3917742945756186e-05, "loss": 0.813, "step": 1100 }, { "epoch": 0.07750791974656811, "grad_norm": 2.578220844268799, "learning_rate": 1.3917498796912684e-05, "loss": 0.7201, "step": 1101 }, { "epoch": 0.0775783174938402, "grad_norm": 2.4114596843719482, "learning_rate": 1.3917254288422154e-05, "loss": 0.7861, "step": 1102 }, { "epoch": 0.07764871524111229, "grad_norm": 2.184274673461914, "learning_rate": 1.391700942029731e-05, "loss": 0.8199, "step": 1103 }, { "epoch": 0.07771911298838437, "grad_norm": 2.7250559329986572, "learning_rate": 1.391676419255088e-05, "loss": 0.7585, "step": 1104 }, { "epoch": 0.07778951073565646, "grad_norm": 2.0673868656158447, "learning_rate": 1.3916518605195617e-05, "loss": 0.6973, "step": 1105 }, { "epoch": 0.07785990848292855, "grad_norm": 2.280029535293579, "learning_rate": 1.3916272658244288e-05, "loss": 0.876, "step": 1106 }, { "epoch": 0.07793030623020063, "grad_norm": 2.3047807216644287, "learning_rate": 1.3916026351709677e-05, "loss": 0.793, "step": 1107 }, { "epoch": 0.07800070397747272, "grad_norm": 2.5945513248443604, "learning_rate": 1.3915779685604596e-05, "loss": 0.8831, "step": 1108 }, { "epoch": 0.07807110172474481, "grad_norm": 2.213367223739624, "learning_rate": 1.3915532659941863e-05, "loss": 0.7468, "step": 1109 }, { "epoch": 0.0781414994720169, "grad_norm": 2.7465169429779053, "learning_rate": 1.3915285274734326e-05, "loss": 0.9056, "step": 1110 }, { "epoch": 0.07821189721928898, "grad_norm": 3.006166696548462, "learning_rate": 1.3915037529994845e-05, "loss": 0.7478, "step": 1111 }, { "epoch": 0.07828229496656107, "grad_norm": 2.5754997730255127, "learning_rate": 1.3914789425736299e-05, "loss": 0.8475, "step": 1112 }, { "epoch": 0.07835269271383316, "grad_norm": 2.2889387607574463, "learning_rate": 1.3914540961971588e-05, "loss": 0.8606, "step": 1113 }, { "epoch": 0.07842309046110524, "grad_norm": 2.606018304824829, "learning_rate": 1.391429213871363e-05, "loss": 0.7655, "step": 1114 }, { "epoch": 0.07849348820837733, "grad_norm": 2.458618402481079, "learning_rate": 1.3914042955975363e-05, "loss": 0.828, "step": 1115 }, { "epoch": 0.07856388595564942, "grad_norm": 2.7074198722839355, "learning_rate": 1.391379341376974e-05, "loss": 0.7365, "step": 1116 }, { "epoch": 0.07863428370292151, "grad_norm": 2.198666572570801, "learning_rate": 1.3913543512109736e-05, "loss": 0.799, "step": 1117 }, { "epoch": 0.07870468145019359, "grad_norm": 3.0352747440338135, "learning_rate": 1.3913293251008345e-05, "loss": 0.7889, "step": 1118 }, { "epoch": 0.07877507919746568, "grad_norm": 2.559067487716675, "learning_rate": 1.3913042630478575e-05, "loss": 0.8505, "step": 1119 }, { "epoch": 0.07884547694473777, "grad_norm": 2.5808472633361816, "learning_rate": 1.3912791650533457e-05, "loss": 0.9135, "step": 1120 }, { "epoch": 0.07891587469200985, "grad_norm": 2.842872142791748, "learning_rate": 1.3912540311186044e-05, "loss": 0.8751, "step": 1121 }, { "epoch": 0.07898627243928194, "grad_norm": 2.315624475479126, "learning_rate": 1.3912288612449396e-05, "loss": 0.7087, "step": 1122 }, { "epoch": 0.07905667018655403, "grad_norm": 4.027956962585449, "learning_rate": 1.3912036554336606e-05, "loss": 0.8411, "step": 1123 }, { "epoch": 0.07912706793382612, "grad_norm": 2.1606645584106445, "learning_rate": 1.3911784136860776e-05, "loss": 0.7693, "step": 1124 }, { "epoch": 0.0791974656810982, "grad_norm": 2.377943515777588, "learning_rate": 1.3911531360035027e-05, "loss": 0.8472, "step": 1125 }, { "epoch": 0.07926786342837029, "grad_norm": 2.6446452140808105, "learning_rate": 1.3911278223872502e-05, "loss": 0.851, "step": 1126 }, { "epoch": 0.07933826117564238, "grad_norm": 2.9320027828216553, "learning_rate": 1.3911024728386364e-05, "loss": 0.8202, "step": 1127 }, { "epoch": 0.07940865892291446, "grad_norm": 2.632298469543457, "learning_rate": 1.3910770873589794e-05, "loss": 0.8758, "step": 1128 }, { "epoch": 0.07947905667018655, "grad_norm": 2.4498450756073, "learning_rate": 1.3910516659495986e-05, "loss": 0.7561, "step": 1129 }, { "epoch": 0.07954945441745864, "grad_norm": 2.2349772453308105, "learning_rate": 1.3910262086118157e-05, "loss": 0.7609, "step": 1130 }, { "epoch": 0.07961985216473073, "grad_norm": 2.0929770469665527, "learning_rate": 1.3910007153469544e-05, "loss": 0.7206, "step": 1131 }, { "epoch": 0.07969024991200281, "grad_norm": 2.680788993835449, "learning_rate": 1.3909751861563402e-05, "loss": 0.6386, "step": 1132 }, { "epoch": 0.0797606476592749, "grad_norm": 2.9313316345214844, "learning_rate": 1.3909496210413001e-05, "loss": 0.789, "step": 1133 }, { "epoch": 0.079831045406547, "grad_norm": 2.3499743938446045, "learning_rate": 1.3909240200031637e-05, "loss": 0.9106, "step": 1134 }, { "epoch": 0.07990144315381907, "grad_norm": 2.4001412391662598, "learning_rate": 1.3908983830432618e-05, "loss": 0.8043, "step": 1135 }, { "epoch": 0.07997184090109116, "grad_norm": 2.4667415618896484, "learning_rate": 1.390872710162927e-05, "loss": 0.838, "step": 1136 }, { "epoch": 0.08004223864836325, "grad_norm": 3.071204423904419, "learning_rate": 1.3908470013634942e-05, "loss": 0.7939, "step": 1137 }, { "epoch": 0.08011263639563534, "grad_norm": 4.286511421203613, "learning_rate": 1.3908212566463004e-05, "loss": 0.938, "step": 1138 }, { "epoch": 0.08018303414290742, "grad_norm": 2.4984350204467773, "learning_rate": 1.3907954760126839e-05, "loss": 0.7742, "step": 1139 }, { "epoch": 0.08025343189017951, "grad_norm": 2.583301544189453, "learning_rate": 1.3907696594639846e-05, "loss": 0.8492, "step": 1140 }, { "epoch": 0.0803238296374516, "grad_norm": 2.0510408878326416, "learning_rate": 1.3907438070015454e-05, "loss": 0.7681, "step": 1141 }, { "epoch": 0.08039422738472368, "grad_norm": 2.0713818073272705, "learning_rate": 1.3907179186267103e-05, "loss": 0.8141, "step": 1142 }, { "epoch": 0.08046462513199577, "grad_norm": 1.994084119796753, "learning_rate": 1.3906919943408247e-05, "loss": 0.7366, "step": 1143 }, { "epoch": 0.08053502287926786, "grad_norm": 2.5985875129699707, "learning_rate": 1.390666034145237e-05, "loss": 0.7806, "step": 1144 }, { "epoch": 0.08060542062653996, "grad_norm": 2.1617910861968994, "learning_rate": 1.3906400380412965e-05, "loss": 0.9237, "step": 1145 }, { "epoch": 0.08067581837381203, "grad_norm": 2.5079469680786133, "learning_rate": 1.390614006030355e-05, "loss": 0.6956, "step": 1146 }, { "epoch": 0.08074621612108412, "grad_norm": 2.830551862716675, "learning_rate": 1.390587938113766e-05, "loss": 0.8772, "step": 1147 }, { "epoch": 0.08081661386835622, "grad_norm": 3.30703067779541, "learning_rate": 1.3905618342928843e-05, "loss": 0.756, "step": 1148 }, { "epoch": 0.0808870116156283, "grad_norm": 2.7300477027893066, "learning_rate": 1.3905356945690678e-05, "loss": 0.7942, "step": 1149 }, { "epoch": 0.08095740936290038, "grad_norm": 2.952061891555786, "learning_rate": 1.3905095189436748e-05, "loss": 0.82, "step": 1150 }, { "epoch": 0.08102780711017248, "grad_norm": 2.432748794555664, "learning_rate": 1.390483307418067e-05, "loss": 0.7844, "step": 1151 }, { "epoch": 0.08109820485744457, "grad_norm": 2.337480306625366, "learning_rate": 1.3904570599936064e-05, "loss": 0.8442, "step": 1152 }, { "epoch": 0.08116860260471664, "grad_norm": 2.1601502895355225, "learning_rate": 1.390430776671658e-05, "loss": 0.798, "step": 1153 }, { "epoch": 0.08123900035198874, "grad_norm": 2.622744083404541, "learning_rate": 1.3904044574535883e-05, "loss": 0.7757, "step": 1154 }, { "epoch": 0.08130939809926083, "grad_norm": 2.1422293186187744, "learning_rate": 1.3903781023407656e-05, "loss": 0.5373, "step": 1155 }, { "epoch": 0.0813797958465329, "grad_norm": 1.9106401205062866, "learning_rate": 1.3903517113345598e-05, "loss": 0.7232, "step": 1156 }, { "epoch": 0.081450193593805, "grad_norm": 2.0706582069396973, "learning_rate": 1.3903252844363438e-05, "loss": 0.9279, "step": 1157 }, { "epoch": 0.08152059134107709, "grad_norm": 2.421539068222046, "learning_rate": 1.3902988216474909e-05, "loss": 0.7756, "step": 1158 }, { "epoch": 0.08159098908834918, "grad_norm": 2.4386990070343018, "learning_rate": 1.3902723229693771e-05, "loss": 0.7715, "step": 1159 }, { "epoch": 0.08166138683562126, "grad_norm": 2.156053066253662, "learning_rate": 1.39024578840338e-05, "loss": 0.6423, "step": 1160 }, { "epoch": 0.08173178458289335, "grad_norm": 2.495974540710449, "learning_rate": 1.390219217950879e-05, "loss": 0.9415, "step": 1161 }, { "epoch": 0.08180218233016544, "grad_norm": 2.8849143981933594, "learning_rate": 1.3901926116132562e-05, "loss": 0.8068, "step": 1162 }, { "epoch": 0.08187258007743752, "grad_norm": 2.169585943222046, "learning_rate": 1.390165969391894e-05, "loss": 0.7219, "step": 1163 }, { "epoch": 0.0819429778247096, "grad_norm": 2.262244701385498, "learning_rate": 1.3901392912881783e-05, "loss": 0.7655, "step": 1164 }, { "epoch": 0.0820133755719817, "grad_norm": 2.673832893371582, "learning_rate": 1.3901125773034958e-05, "loss": 0.7721, "step": 1165 }, { "epoch": 0.08208377331925379, "grad_norm": 2.455653190612793, "learning_rate": 1.3900858274392355e-05, "loss": 0.8008, "step": 1166 }, { "epoch": 0.08215417106652587, "grad_norm": 2.270050048828125, "learning_rate": 1.390059041696788e-05, "loss": 0.8389, "step": 1167 }, { "epoch": 0.08222456881379796, "grad_norm": 1.9465999603271484, "learning_rate": 1.3900322200775458e-05, "loss": 0.7993, "step": 1168 }, { "epoch": 0.08229496656107005, "grad_norm": 3.4678690433502197, "learning_rate": 1.3900053625829035e-05, "loss": 0.833, "step": 1169 }, { "epoch": 0.08236536430834214, "grad_norm": 2.734975814819336, "learning_rate": 1.3899784692142576e-05, "loss": 0.7439, "step": 1170 }, { "epoch": 0.08243576205561422, "grad_norm": 2.213425397872925, "learning_rate": 1.3899515399730065e-05, "loss": 0.7798, "step": 1171 }, { "epoch": 0.08250615980288631, "grad_norm": 2.859841823577881, "learning_rate": 1.3899245748605495e-05, "loss": 0.8909, "step": 1172 }, { "epoch": 0.0825765575501584, "grad_norm": 2.0889432430267334, "learning_rate": 1.3898975738782894e-05, "loss": 0.8147, "step": 1173 }, { "epoch": 0.08264695529743048, "grad_norm": 2.589876413345337, "learning_rate": 1.3898705370276294e-05, "loss": 0.7111, "step": 1174 }, { "epoch": 0.08271735304470257, "grad_norm": 2.3769683837890625, "learning_rate": 1.3898434643099754e-05, "loss": 0.681, "step": 1175 }, { "epoch": 0.08278775079197466, "grad_norm": 2.908860683441162, "learning_rate": 1.3898163557267349e-05, "loss": 0.7034, "step": 1176 }, { "epoch": 0.08285814853924675, "grad_norm": 2.564044237136841, "learning_rate": 1.3897892112793175e-05, "loss": 0.8255, "step": 1177 }, { "epoch": 0.08292854628651883, "grad_norm": 2.162842035293579, "learning_rate": 1.3897620309691343e-05, "loss": 0.706, "step": 1178 }, { "epoch": 0.08299894403379092, "grad_norm": 2.9402880668640137, "learning_rate": 1.3897348147975985e-05, "loss": 0.8585, "step": 1179 }, { "epoch": 0.08306934178106301, "grad_norm": 2.2693889141082764, "learning_rate": 1.3897075627661247e-05, "loss": 0.7962, "step": 1180 }, { "epoch": 0.08313973952833509, "grad_norm": 3.39579176902771, "learning_rate": 1.3896802748761302e-05, "loss": 0.8012, "step": 1181 }, { "epoch": 0.08321013727560718, "grad_norm": 2.2999424934387207, "learning_rate": 1.3896529511290336e-05, "loss": 0.8648, "step": 1182 }, { "epoch": 0.08328053502287927, "grad_norm": 2.5290372371673584, "learning_rate": 1.3896255915262555e-05, "loss": 0.681, "step": 1183 }, { "epoch": 0.08335093277015136, "grad_norm": 2.6502726078033447, "learning_rate": 1.3895981960692185e-05, "loss": 0.8042, "step": 1184 }, { "epoch": 0.08342133051742344, "grad_norm": 2.231309413909912, "learning_rate": 1.3895707647593465e-05, "loss": 0.8956, "step": 1185 }, { "epoch": 0.08349172826469553, "grad_norm": 2.863345146179199, "learning_rate": 1.389543297598066e-05, "loss": 0.885, "step": 1186 }, { "epoch": 0.08356212601196762, "grad_norm": 3.0012638568878174, "learning_rate": 1.3895157945868051e-05, "loss": 0.8302, "step": 1187 }, { "epoch": 0.0836325237592397, "grad_norm": 2.793842077255249, "learning_rate": 1.3894882557269934e-05, "loss": 0.9294, "step": 1188 }, { "epoch": 0.08370292150651179, "grad_norm": 2.2975356578826904, "learning_rate": 1.3894606810200628e-05, "loss": 0.7988, "step": 1189 }, { "epoch": 0.08377331925378388, "grad_norm": 2.162510871887207, "learning_rate": 1.3894330704674472e-05, "loss": 0.7758, "step": 1190 }, { "epoch": 0.08384371700105597, "grad_norm": 2.535217046737671, "learning_rate": 1.3894054240705819e-05, "loss": 0.7795, "step": 1191 }, { "epoch": 0.08391411474832805, "grad_norm": 2.562312126159668, "learning_rate": 1.389377741830904e-05, "loss": 0.7146, "step": 1192 }, { "epoch": 0.08398451249560014, "grad_norm": 2.719789505004883, "learning_rate": 1.3893500237498531e-05, "loss": 0.8078, "step": 1193 }, { "epoch": 0.08405491024287223, "grad_norm": 2.3458685874938965, "learning_rate": 1.3893222698288703e-05, "loss": 0.8022, "step": 1194 }, { "epoch": 0.08412530799014431, "grad_norm": 3.404025077819824, "learning_rate": 1.3892944800693981e-05, "loss": 0.6935, "step": 1195 }, { "epoch": 0.0841957057374164, "grad_norm": 2.1639485359191895, "learning_rate": 1.389266654472882e-05, "loss": 0.7435, "step": 1196 }, { "epoch": 0.08426610348468849, "grad_norm": 2.474079132080078, "learning_rate": 1.389238793040768e-05, "loss": 0.8526, "step": 1197 }, { "epoch": 0.08433650123196058, "grad_norm": 2.1720364093780518, "learning_rate": 1.389210895774505e-05, "loss": 0.7507, "step": 1198 }, { "epoch": 0.08440689897923266, "grad_norm": 2.9660544395446777, "learning_rate": 1.3891829626755432e-05, "loss": 0.8288, "step": 1199 }, { "epoch": 0.08447729672650475, "grad_norm": 2.234060049057007, "learning_rate": 1.3891549937453353e-05, "loss": 0.8511, "step": 1200 }, { "epoch": 0.08454769447377684, "grad_norm": 2.2961580753326416, "learning_rate": 1.389126988985335e-05, "loss": 0.7524, "step": 1201 }, { "epoch": 0.08461809222104892, "grad_norm": 2.5138049125671387, "learning_rate": 1.3890989483969984e-05, "loss": 0.8859, "step": 1202 }, { "epoch": 0.08468848996832101, "grad_norm": 2.0798654556274414, "learning_rate": 1.3890708719817834e-05, "loss": 0.7693, "step": 1203 }, { "epoch": 0.0847588877155931, "grad_norm": 3.122779607772827, "learning_rate": 1.3890427597411498e-05, "loss": 0.8716, "step": 1204 }, { "epoch": 0.0848292854628652, "grad_norm": 2.847696542739868, "learning_rate": 1.3890146116765588e-05, "loss": 0.8266, "step": 1205 }, { "epoch": 0.08489968321013727, "grad_norm": 2.395873785018921, "learning_rate": 1.3889864277894744e-05, "loss": 0.911, "step": 1206 }, { "epoch": 0.08497008095740936, "grad_norm": 2.2786810398101807, "learning_rate": 1.3889582080813615e-05, "loss": 0.8183, "step": 1207 }, { "epoch": 0.08504047870468145, "grad_norm": 2.9216792583465576, "learning_rate": 1.3889299525536876e-05, "loss": 0.8303, "step": 1208 }, { "epoch": 0.08511087645195353, "grad_norm": 2.7165164947509766, "learning_rate": 1.3889016612079214e-05, "loss": 0.8358, "step": 1209 }, { "epoch": 0.08518127419922562, "grad_norm": 2.5285568237304688, "learning_rate": 1.388873334045534e-05, "loss": 0.7461, "step": 1210 }, { "epoch": 0.08525167194649771, "grad_norm": 2.4055800437927246, "learning_rate": 1.3888449710679979e-05, "loss": 0.8048, "step": 1211 }, { "epoch": 0.0853220696937698, "grad_norm": 2.3476266860961914, "learning_rate": 1.388816572276788e-05, "loss": 0.7908, "step": 1212 }, { "epoch": 0.08539246744104188, "grad_norm": 2.1052346229553223, "learning_rate": 1.3887881376733808e-05, "loss": 0.7113, "step": 1213 }, { "epoch": 0.08546286518831397, "grad_norm": 2.121363401412964, "learning_rate": 1.3887596672592544e-05, "loss": 0.9373, "step": 1214 }, { "epoch": 0.08553326293558607, "grad_norm": 2.3110511302948, "learning_rate": 1.3887311610358892e-05, "loss": 0.8173, "step": 1215 }, { "epoch": 0.08560366068285814, "grad_norm": 2.68574857711792, "learning_rate": 1.3887026190047672e-05, "loss": 0.8506, "step": 1216 }, { "epoch": 0.08567405843013023, "grad_norm": 2.5414986610412598, "learning_rate": 1.3886740411673721e-05, "loss": 0.8525, "step": 1217 }, { "epoch": 0.08574445617740233, "grad_norm": 2.2566280364990234, "learning_rate": 1.38864542752519e-05, "loss": 0.8529, "step": 1218 }, { "epoch": 0.08581485392467442, "grad_norm": 2.4427168369293213, "learning_rate": 1.3886167780797087e-05, "loss": 0.7075, "step": 1219 }, { "epoch": 0.0858852516719465, "grad_norm": 3.031726360321045, "learning_rate": 1.388588092832417e-05, "loss": 0.7759, "step": 1220 }, { "epoch": 0.08595564941921859, "grad_norm": 4.495028018951416, "learning_rate": 1.3885593717848072e-05, "loss": 0.7418, "step": 1221 }, { "epoch": 0.08602604716649068, "grad_norm": 2.4878880977630615, "learning_rate": 1.3885306149383719e-05, "loss": 0.6506, "step": 1222 }, { "epoch": 0.08609644491376275, "grad_norm": 2.5579073429107666, "learning_rate": 1.388501822294606e-05, "loss": 0.8879, "step": 1223 }, { "epoch": 0.08616684266103485, "grad_norm": 2.3285892009735107, "learning_rate": 1.3884729938550072e-05, "loss": 0.6988, "step": 1224 }, { "epoch": 0.08623724040830694, "grad_norm": 7.200527191162109, "learning_rate": 1.3884441296210738e-05, "loss": 0.7056, "step": 1225 }, { "epoch": 0.08630763815557903, "grad_norm": 3.112534284591675, "learning_rate": 1.3884152295943068e-05, "loss": 0.6785, "step": 1226 }, { "epoch": 0.0863780359028511, "grad_norm": 2.8253750801086426, "learning_rate": 1.3883862937762082e-05, "loss": 0.9163, "step": 1227 }, { "epoch": 0.0864484336501232, "grad_norm": 3.5740163326263428, "learning_rate": 1.3883573221682832e-05, "loss": 0.856, "step": 1228 }, { "epoch": 0.08651883139739529, "grad_norm": 2.1805641651153564, "learning_rate": 1.3883283147720374e-05, "loss": 0.8387, "step": 1229 }, { "epoch": 0.08658922914466737, "grad_norm": 2.7790706157684326, "learning_rate": 1.388299271588979e-05, "loss": 0.8236, "step": 1230 }, { "epoch": 0.08665962689193946, "grad_norm": 2.7124087810516357, "learning_rate": 1.3882701926206182e-05, "loss": 0.6686, "step": 1231 }, { "epoch": 0.08673002463921155, "grad_norm": 2.5937278270721436, "learning_rate": 1.3882410778684666e-05, "loss": 0.6733, "step": 1232 }, { "epoch": 0.08680042238648364, "grad_norm": 2.4791929721832275, "learning_rate": 1.3882119273340384e-05, "loss": 0.7743, "step": 1233 }, { "epoch": 0.08687082013375572, "grad_norm": 2.413642644882202, "learning_rate": 1.3881827410188485e-05, "loss": 0.8598, "step": 1234 }, { "epoch": 0.08694121788102781, "grad_norm": 2.8802154064178467, "learning_rate": 1.3881535189244148e-05, "loss": 0.7586, "step": 1235 }, { "epoch": 0.0870116156282999, "grad_norm": 2.4883174896240234, "learning_rate": 1.3881242610522565e-05, "loss": 0.8073, "step": 1236 }, { "epoch": 0.08708201337557198, "grad_norm": 2.4571712017059326, "learning_rate": 1.3880949674038945e-05, "loss": 0.8557, "step": 1237 }, { "epoch": 0.08715241112284407, "grad_norm": 2.7038302421569824, "learning_rate": 1.388065637980852e-05, "loss": 0.8538, "step": 1238 }, { "epoch": 0.08722280887011616, "grad_norm": 4.072126388549805, "learning_rate": 1.3880362727846538e-05, "loss": 0.6583, "step": 1239 }, { "epoch": 0.08729320661738825, "grad_norm": 2.840019941329956, "learning_rate": 1.3880068718168267e-05, "loss": 0.752, "step": 1240 }, { "epoch": 0.08736360436466033, "grad_norm": 2.644291639328003, "learning_rate": 1.3879774350788994e-05, "loss": 0.827, "step": 1241 }, { "epoch": 0.08743400211193242, "grad_norm": 2.6969783306121826, "learning_rate": 1.3879479625724018e-05, "loss": 0.8139, "step": 1242 }, { "epoch": 0.08750439985920451, "grad_norm": 2.3683059215545654, "learning_rate": 1.387918454298867e-05, "loss": 0.7872, "step": 1243 }, { "epoch": 0.08757479760647659, "grad_norm": 2.622821092605591, "learning_rate": 1.3878889102598285e-05, "loss": 0.8955, "step": 1244 }, { "epoch": 0.08764519535374868, "grad_norm": 2.576616048812866, "learning_rate": 1.3878593304568225e-05, "loss": 0.8943, "step": 1245 }, { "epoch": 0.08771559310102077, "grad_norm": 2.552034616470337, "learning_rate": 1.3878297148913871e-05, "loss": 0.9009, "step": 1246 }, { "epoch": 0.08778599084829286, "grad_norm": 2.806102752685547, "learning_rate": 1.387800063565062e-05, "loss": 0.7493, "step": 1247 }, { "epoch": 0.08785638859556494, "grad_norm": 2.813443422317505, "learning_rate": 1.3877703764793886e-05, "loss": 0.8974, "step": 1248 }, { "epoch": 0.08792678634283703, "grad_norm": 2.6058084964752197, "learning_rate": 1.3877406536359102e-05, "loss": 0.9062, "step": 1249 }, { "epoch": 0.08799718409010912, "grad_norm": 2.30733585357666, "learning_rate": 1.3877108950361726e-05, "loss": 0.7061, "step": 1250 }, { "epoch": 0.0880675818373812, "grad_norm": 2.7863380908966064, "learning_rate": 1.3876811006817227e-05, "loss": 0.8113, "step": 1251 }, { "epoch": 0.08813797958465329, "grad_norm": 2.764026641845703, "learning_rate": 1.3876512705741095e-05, "loss": 0.8435, "step": 1252 }, { "epoch": 0.08820837733192538, "grad_norm": 2.821798324584961, "learning_rate": 1.3876214047148842e-05, "loss": 0.8674, "step": 1253 }, { "epoch": 0.08827877507919747, "grad_norm": 2.3681561946868896, "learning_rate": 1.387591503105599e-05, "loss": 0.7847, "step": 1254 }, { "epoch": 0.08834917282646955, "grad_norm": 2.340742349624634, "learning_rate": 1.387561565747809e-05, "loss": 0.7261, "step": 1255 }, { "epoch": 0.08841957057374164, "grad_norm": 2.657803773880005, "learning_rate": 1.3875315926430704e-05, "loss": 0.878, "step": 1256 }, { "epoch": 0.08848996832101373, "grad_norm": 2.602449893951416, "learning_rate": 1.3875015837929418e-05, "loss": 0.8129, "step": 1257 }, { "epoch": 0.08856036606828581, "grad_norm": 2.1301064491271973, "learning_rate": 1.3874715391989833e-05, "loss": 0.7257, "step": 1258 }, { "epoch": 0.0886307638155579, "grad_norm": 2.774780035018921, "learning_rate": 1.3874414588627566e-05, "loss": 0.7082, "step": 1259 }, { "epoch": 0.08870116156282999, "grad_norm": 2.597869396209717, "learning_rate": 1.387411342785826e-05, "loss": 0.7646, "step": 1260 }, { "epoch": 0.08877155931010208, "grad_norm": 2.3135950565338135, "learning_rate": 1.3873811909697572e-05, "loss": 0.914, "step": 1261 }, { "epoch": 0.08884195705737416, "grad_norm": 3.342412233352661, "learning_rate": 1.3873510034161175e-05, "loss": 0.8342, "step": 1262 }, { "epoch": 0.08891235480464625, "grad_norm": 2.6504201889038086, "learning_rate": 1.387320780126477e-05, "loss": 0.7169, "step": 1263 }, { "epoch": 0.08898275255191834, "grad_norm": 2.190500020980835, "learning_rate": 1.3872905211024065e-05, "loss": 0.8503, "step": 1264 }, { "epoch": 0.08905315029919042, "grad_norm": 2.3023252487182617, "learning_rate": 1.3872602263454793e-05, "loss": 0.8676, "step": 1265 }, { "epoch": 0.08912354804646251, "grad_norm": 2.9510107040405273, "learning_rate": 1.3872298958572704e-05, "loss": 0.8212, "step": 1266 }, { "epoch": 0.0891939457937346, "grad_norm": 2.6536686420440674, "learning_rate": 1.387199529639357e-05, "loss": 0.8426, "step": 1267 }, { "epoch": 0.0892643435410067, "grad_norm": 2.8836700916290283, "learning_rate": 1.3871691276933177e-05, "loss": 0.8276, "step": 1268 }, { "epoch": 0.08933474128827877, "grad_norm": 2.256439208984375, "learning_rate": 1.387138690020733e-05, "loss": 0.7493, "step": 1269 }, { "epoch": 0.08940513903555086, "grad_norm": 2.5551016330718994, "learning_rate": 1.3871082166231855e-05, "loss": 0.7489, "step": 1270 }, { "epoch": 0.08947553678282295, "grad_norm": 2.937544107437134, "learning_rate": 1.3870777075022597e-05, "loss": 0.7872, "step": 1271 }, { "epoch": 0.08954593453009503, "grad_norm": 2.5060267448425293, "learning_rate": 1.3870471626595416e-05, "loss": 0.7802, "step": 1272 }, { "epoch": 0.08961633227736712, "grad_norm": 2.519090175628662, "learning_rate": 1.3870165820966192e-05, "loss": 0.7205, "step": 1273 }, { "epoch": 0.08968673002463921, "grad_norm": 2.4442453384399414, "learning_rate": 1.3869859658150824e-05, "loss": 0.8573, "step": 1274 }, { "epoch": 0.0897571277719113, "grad_norm": 3.242283344268799, "learning_rate": 1.3869553138165233e-05, "loss": 0.7878, "step": 1275 }, { "epoch": 0.08982752551918338, "grad_norm": 2.114166259765625, "learning_rate": 1.3869246261025351e-05, "loss": 0.7206, "step": 1276 }, { "epoch": 0.08989792326645547, "grad_norm": 2.6408743858337402, "learning_rate": 1.3868939026747136e-05, "loss": 0.8894, "step": 1277 }, { "epoch": 0.08996832101372756, "grad_norm": 3.6767542362213135, "learning_rate": 1.3868631435346559e-05, "loss": 0.7697, "step": 1278 }, { "epoch": 0.09003871876099964, "grad_norm": 5.262668132781982, "learning_rate": 1.3868323486839615e-05, "loss": 0.8461, "step": 1279 }, { "epoch": 0.09010911650827173, "grad_norm": 2.3567543029785156, "learning_rate": 1.386801518124231e-05, "loss": 0.7171, "step": 1280 }, { "epoch": 0.09017951425554382, "grad_norm": 2.3477931022644043, "learning_rate": 1.3867706518570676e-05, "loss": 0.715, "step": 1281 }, { "epoch": 0.09024991200281592, "grad_norm": 2.534640073776245, "learning_rate": 1.3867397498840761e-05, "loss": 0.7449, "step": 1282 }, { "epoch": 0.090320309750088, "grad_norm": 2.312380790710449, "learning_rate": 1.3867088122068632e-05, "loss": 0.7646, "step": 1283 }, { "epoch": 0.09039070749736008, "grad_norm": 2.9777767658233643, "learning_rate": 1.3866778388270371e-05, "loss": 0.7833, "step": 1284 }, { "epoch": 0.09046110524463218, "grad_norm": 2.1043152809143066, "learning_rate": 1.3866468297462083e-05, "loss": 0.7406, "step": 1285 }, { "epoch": 0.09053150299190425, "grad_norm": 2.614577293395996, "learning_rate": 1.386615784965989e-05, "loss": 0.806, "step": 1286 }, { "epoch": 0.09060190073917634, "grad_norm": 2.661571741104126, "learning_rate": 1.3865847044879931e-05, "loss": 0.9052, "step": 1287 }, { "epoch": 0.09067229848644844, "grad_norm": 2.3111274242401123, "learning_rate": 1.3865535883138366e-05, "loss": 0.8733, "step": 1288 }, { "epoch": 0.09074269623372053, "grad_norm": 1.9842638969421387, "learning_rate": 1.3865224364451372e-05, "loss": 0.8459, "step": 1289 }, { "epoch": 0.0908130939809926, "grad_norm": 2.4144270420074463, "learning_rate": 1.3864912488835148e-05, "loss": 0.8015, "step": 1290 }, { "epoch": 0.0908834917282647, "grad_norm": 2.203235387802124, "learning_rate": 1.3864600256305906e-05, "loss": 0.8799, "step": 1291 }, { "epoch": 0.09095388947553679, "grad_norm": 2.2370188236236572, "learning_rate": 1.386428766687988e-05, "loss": 0.825, "step": 1292 }, { "epoch": 0.09102428722280886, "grad_norm": 2.5947988033294678, "learning_rate": 1.386397472057332e-05, "loss": 0.7476, "step": 1293 }, { "epoch": 0.09109468497008096, "grad_norm": 2.7820844650268555, "learning_rate": 1.3863661417402497e-05, "loss": 0.868, "step": 1294 }, { "epoch": 0.09116508271735305, "grad_norm": 2.434504747390747, "learning_rate": 1.3863347757383704e-05, "loss": 0.8229, "step": 1295 }, { "epoch": 0.09123548046462514, "grad_norm": 2.003082752227783, "learning_rate": 1.3863033740533243e-05, "loss": 0.8127, "step": 1296 }, { "epoch": 0.09130587821189722, "grad_norm": 2.8632137775421143, "learning_rate": 1.3862719366867443e-05, "loss": 0.815, "step": 1297 }, { "epoch": 0.0913762759591693, "grad_norm": 2.8333075046539307, "learning_rate": 1.3862404636402647e-05, "loss": 0.8728, "step": 1298 }, { "epoch": 0.0914466737064414, "grad_norm": 3.1794545650482178, "learning_rate": 1.386208954915522e-05, "loss": 0.8344, "step": 1299 }, { "epoch": 0.09151707145371347, "grad_norm": 2.5439274311065674, "learning_rate": 1.3861774105141543e-05, "loss": 0.8246, "step": 1300 }, { "epoch": 0.09158746920098557, "grad_norm": 3.2485299110412598, "learning_rate": 1.3861458304378014e-05, "loss": 0.8567, "step": 1301 }, { "epoch": 0.09165786694825766, "grad_norm": 2.0220799446105957, "learning_rate": 1.3861142146881056e-05, "loss": 0.6762, "step": 1302 }, { "epoch": 0.09172826469552975, "grad_norm": 2.6894168853759766, "learning_rate": 1.3860825632667102e-05, "loss": 0.7731, "step": 1303 }, { "epoch": 0.09179866244280183, "grad_norm": 2.2226099967956543, "learning_rate": 1.3860508761752612e-05, "loss": 0.806, "step": 1304 }, { "epoch": 0.09186906019007392, "grad_norm": 2.5051088333129883, "learning_rate": 1.3860191534154057e-05, "loss": 0.783, "step": 1305 }, { "epoch": 0.09193945793734601, "grad_norm": 2.3245034217834473, "learning_rate": 1.385987394988793e-05, "loss": 0.7509, "step": 1306 }, { "epoch": 0.09200985568461809, "grad_norm": 2.097900867462158, "learning_rate": 1.3859556008970746e-05, "loss": 0.8158, "step": 1307 }, { "epoch": 0.09208025343189018, "grad_norm": 2.423870325088501, "learning_rate": 1.3859237711419033e-05, "loss": 0.8729, "step": 1308 }, { "epoch": 0.09215065117916227, "grad_norm": 2.469750165939331, "learning_rate": 1.3858919057249338e-05, "loss": 0.6643, "step": 1309 }, { "epoch": 0.09222104892643436, "grad_norm": 2.652338743209839, "learning_rate": 1.3858600046478229e-05, "loss": 0.8066, "step": 1310 }, { "epoch": 0.09229144667370644, "grad_norm": 2.964411973953247, "learning_rate": 1.385828067912229e-05, "loss": 0.9045, "step": 1311 }, { "epoch": 0.09236184442097853, "grad_norm": 2.3329355716705322, "learning_rate": 1.385796095519813e-05, "loss": 0.803, "step": 1312 }, { "epoch": 0.09243224216825062, "grad_norm": 2.5097787380218506, "learning_rate": 1.385764087472237e-05, "loss": 0.7405, "step": 1313 }, { "epoch": 0.0925026399155227, "grad_norm": 2.2900328636169434, "learning_rate": 1.3857320437711648e-05, "loss": 0.7038, "step": 1314 }, { "epoch": 0.09257303766279479, "grad_norm": 2.4353079795837402, "learning_rate": 1.3856999644182628e-05, "loss": 0.7825, "step": 1315 }, { "epoch": 0.09264343541006688, "grad_norm": 1.9602270126342773, "learning_rate": 1.3856678494151985e-05, "loss": 0.8439, "step": 1316 }, { "epoch": 0.09271383315733897, "grad_norm": 2.6522114276885986, "learning_rate": 1.3856356987636416e-05, "loss": 0.7998, "step": 1317 }, { "epoch": 0.09278423090461105, "grad_norm": 2.130891799926758, "learning_rate": 1.3856035124652641e-05, "loss": 0.6967, "step": 1318 }, { "epoch": 0.09285462865188314, "grad_norm": 2.1575052738189697, "learning_rate": 1.3855712905217388e-05, "loss": 0.7556, "step": 1319 }, { "epoch": 0.09292502639915523, "grad_norm": 2.6170058250427246, "learning_rate": 1.3855390329347415e-05, "loss": 0.7389, "step": 1320 }, { "epoch": 0.09299542414642731, "grad_norm": 2.095670700073242, "learning_rate": 1.3855067397059488e-05, "loss": 0.7532, "step": 1321 }, { "epoch": 0.0930658218936994, "grad_norm": 2.4671149253845215, "learning_rate": 1.38547441083704e-05, "loss": 0.7577, "step": 1322 }, { "epoch": 0.09313621964097149, "grad_norm": 2.939295530319214, "learning_rate": 1.3854420463296956e-05, "loss": 0.8789, "step": 1323 }, { "epoch": 0.09320661738824358, "grad_norm": 2.3854010105133057, "learning_rate": 1.3854096461855986e-05, "loss": 0.8467, "step": 1324 }, { "epoch": 0.09327701513551566, "grad_norm": 2.4174675941467285, "learning_rate": 1.3853772104064333e-05, "loss": 0.8978, "step": 1325 }, { "epoch": 0.09334741288278775, "grad_norm": 2.2650229930877686, "learning_rate": 1.385344738993886e-05, "loss": 0.7247, "step": 1326 }, { "epoch": 0.09341781063005984, "grad_norm": 2.561947822570801, "learning_rate": 1.385312231949645e-05, "loss": 0.8488, "step": 1327 }, { "epoch": 0.09348820837733192, "grad_norm": 2.5747900009155273, "learning_rate": 1.3852796892754007e-05, "loss": 0.8617, "step": 1328 }, { "epoch": 0.09355860612460401, "grad_norm": 2.3968632221221924, "learning_rate": 1.3852471109728445e-05, "loss": 0.859, "step": 1329 }, { "epoch": 0.0936290038718761, "grad_norm": 2.723215341567993, "learning_rate": 1.3852144970436706e-05, "loss": 0.8457, "step": 1330 }, { "epoch": 0.09369940161914819, "grad_norm": 2.238443374633789, "learning_rate": 1.3851818474895742e-05, "loss": 0.8388, "step": 1331 }, { "epoch": 0.09376979936642027, "grad_norm": 2.9781036376953125, "learning_rate": 1.3851491623122532e-05, "loss": 0.7862, "step": 1332 }, { "epoch": 0.09384019711369236, "grad_norm": 2.152423858642578, "learning_rate": 1.3851164415134068e-05, "loss": 0.7846, "step": 1333 }, { "epoch": 0.09391059486096445, "grad_norm": 5.138025760650635, "learning_rate": 1.3850836850947357e-05, "loss": 0.6571, "step": 1334 }, { "epoch": 0.09398099260823653, "grad_norm": 2.105922222137451, "learning_rate": 1.3850508930579436e-05, "loss": 0.7917, "step": 1335 }, { "epoch": 0.09405139035550862, "grad_norm": 2.361989974975586, "learning_rate": 1.3850180654047353e-05, "loss": 0.8194, "step": 1336 }, { "epoch": 0.09412178810278071, "grad_norm": 2.111793279647827, "learning_rate": 1.3849852021368174e-05, "loss": 0.7655, "step": 1337 }, { "epoch": 0.0941921858500528, "grad_norm": 2.6305441856384277, "learning_rate": 1.3849523032558984e-05, "loss": 0.9304, "step": 1338 }, { "epoch": 0.09426258359732488, "grad_norm": 2.5034306049346924, "learning_rate": 1.3849193687636888e-05, "loss": 0.7168, "step": 1339 }, { "epoch": 0.09433298134459697, "grad_norm": 2.2680504322052, "learning_rate": 1.3848863986619008e-05, "loss": 0.7562, "step": 1340 }, { "epoch": 0.09440337909186906, "grad_norm": 2.807737350463867, "learning_rate": 1.3848533929522489e-05, "loss": 0.7224, "step": 1341 }, { "epoch": 0.09447377683914114, "grad_norm": 2.4741384983062744, "learning_rate": 1.3848203516364487e-05, "loss": 0.7647, "step": 1342 }, { "epoch": 0.09454417458641323, "grad_norm": 2.344810962677002, "learning_rate": 1.3847872747162183e-05, "loss": 0.7944, "step": 1343 }, { "epoch": 0.09461457233368532, "grad_norm": 2.4946224689483643, "learning_rate": 1.3847541621932774e-05, "loss": 0.7797, "step": 1344 }, { "epoch": 0.09468497008095741, "grad_norm": 2.278512477874756, "learning_rate": 1.3847210140693473e-05, "loss": 0.8775, "step": 1345 }, { "epoch": 0.09475536782822949, "grad_norm": 2.3273086547851562, "learning_rate": 1.3846878303461514e-05, "loss": 0.7515, "step": 1346 }, { "epoch": 0.09482576557550158, "grad_norm": 2.5070323944091797, "learning_rate": 1.3846546110254153e-05, "loss": 0.7492, "step": 1347 }, { "epoch": 0.09489616332277367, "grad_norm": 2.062927484512329, "learning_rate": 1.384621356108866e-05, "loss": 0.8259, "step": 1348 }, { "epoch": 0.09496656107004575, "grad_norm": 2.5209076404571533, "learning_rate": 1.3845880655982322e-05, "loss": 0.7886, "step": 1349 }, { "epoch": 0.09503695881731784, "grad_norm": 2.4856977462768555, "learning_rate": 1.3845547394952451e-05, "loss": 0.8099, "step": 1350 }, { "epoch": 0.09510735656458993, "grad_norm": 2.125852346420288, "learning_rate": 1.3845213778016367e-05, "loss": 0.7939, "step": 1351 }, { "epoch": 0.09517775431186203, "grad_norm": 2.2064595222473145, "learning_rate": 1.3844879805191423e-05, "loss": 0.7733, "step": 1352 }, { "epoch": 0.0952481520591341, "grad_norm": 2.861234188079834, "learning_rate": 1.3844545476494977e-05, "loss": 0.7291, "step": 1353 }, { "epoch": 0.0953185498064062, "grad_norm": 2.4675984382629395, "learning_rate": 1.3844210791944414e-05, "loss": 0.6809, "step": 1354 }, { "epoch": 0.09538894755367829, "grad_norm": 2.3248350620269775, "learning_rate": 1.3843875751557133e-05, "loss": 0.718, "step": 1355 }, { "epoch": 0.09545934530095036, "grad_norm": 2.4369735717773438, "learning_rate": 1.3843540355350554e-05, "loss": 0.7312, "step": 1356 }, { "epoch": 0.09552974304822245, "grad_norm": 2.5247199535369873, "learning_rate": 1.3843204603342115e-05, "loss": 0.7104, "step": 1357 }, { "epoch": 0.09560014079549455, "grad_norm": 4.219001293182373, "learning_rate": 1.3842868495549268e-05, "loss": 0.7432, "step": 1358 }, { "epoch": 0.09567053854276664, "grad_norm": 2.19252610206604, "learning_rate": 1.3842532031989493e-05, "loss": 0.8241, "step": 1359 }, { "epoch": 0.09574093629003871, "grad_norm": 4.109724044799805, "learning_rate": 1.384219521268028e-05, "loss": 0.8959, "step": 1360 }, { "epoch": 0.0958113340373108, "grad_norm": 3.523434638977051, "learning_rate": 1.384185803763914e-05, "loss": 0.7549, "step": 1361 }, { "epoch": 0.0958817317845829, "grad_norm": 2.586801052093506, "learning_rate": 1.3841520506883607e-05, "loss": 0.682, "step": 1362 }, { "epoch": 0.09595212953185497, "grad_norm": 3.636446237564087, "learning_rate": 1.3841182620431225e-05, "loss": 0.7714, "step": 1363 }, { "epoch": 0.09602252727912707, "grad_norm": 2.432814359664917, "learning_rate": 1.3840844378299565e-05, "loss": 0.7042, "step": 1364 }, { "epoch": 0.09609292502639916, "grad_norm": 2.7302401065826416, "learning_rate": 1.3840505780506206e-05, "loss": 0.716, "step": 1365 }, { "epoch": 0.09616332277367125, "grad_norm": 3.0137319564819336, "learning_rate": 1.3840166827068759e-05, "loss": 0.8235, "step": 1366 }, { "epoch": 0.09623372052094332, "grad_norm": 2.7418832778930664, "learning_rate": 1.3839827518004845e-05, "loss": 0.7196, "step": 1367 }, { "epoch": 0.09630411826821542, "grad_norm": 2.4431426525115967, "learning_rate": 1.38394878533321e-05, "loss": 0.7854, "step": 1368 }, { "epoch": 0.09637451601548751, "grad_norm": 2.243542432785034, "learning_rate": 1.3839147833068192e-05, "loss": 0.8738, "step": 1369 }, { "epoch": 0.09644491376275958, "grad_norm": 2.922335386276245, "learning_rate": 1.3838807457230792e-05, "loss": 0.8736, "step": 1370 }, { "epoch": 0.09651531151003168, "grad_norm": 2.514521360397339, "learning_rate": 1.3838466725837598e-05, "loss": 0.8892, "step": 1371 }, { "epoch": 0.09658570925730377, "grad_norm": 2.7302026748657227, "learning_rate": 1.3838125638906328e-05, "loss": 0.8144, "step": 1372 }, { "epoch": 0.09665610700457586, "grad_norm": 3.0278453826904297, "learning_rate": 1.383778419645471e-05, "loss": 0.7362, "step": 1373 }, { "epoch": 0.09672650475184794, "grad_norm": 2.437964916229248, "learning_rate": 1.38374423985005e-05, "loss": 0.9273, "step": 1374 }, { "epoch": 0.09679690249912003, "grad_norm": 2.7410168647766113, "learning_rate": 1.383710024506147e-05, "loss": 0.7983, "step": 1375 }, { "epoch": 0.09686730024639212, "grad_norm": 2.2270326614379883, "learning_rate": 1.3836757736155403e-05, "loss": 0.6236, "step": 1376 }, { "epoch": 0.0969376979936642, "grad_norm": 2.5478107929229736, "learning_rate": 1.3836414871800111e-05, "loss": 0.8237, "step": 1377 }, { "epoch": 0.09700809574093629, "grad_norm": 2.2321324348449707, "learning_rate": 1.3836071652013418e-05, "loss": 0.8182, "step": 1378 }, { "epoch": 0.09707849348820838, "grad_norm": 2.3184924125671387, "learning_rate": 1.3835728076813168e-05, "loss": 0.865, "step": 1379 }, { "epoch": 0.09714889123548047, "grad_norm": 2.6252634525299072, "learning_rate": 1.3835384146217225e-05, "loss": 0.8602, "step": 1380 }, { "epoch": 0.09721928898275255, "grad_norm": 2.3333702087402344, "learning_rate": 1.383503986024347e-05, "loss": 0.7972, "step": 1381 }, { "epoch": 0.09728968673002464, "grad_norm": 2.43939208984375, "learning_rate": 1.3834695218909803e-05, "loss": 0.8496, "step": 1382 }, { "epoch": 0.09736008447729673, "grad_norm": 2.213832378387451, "learning_rate": 1.3834350222234141e-05, "loss": 0.7533, "step": 1383 }, { "epoch": 0.09743048222456882, "grad_norm": 2.013314723968506, "learning_rate": 1.3834004870234422e-05, "loss": 0.7306, "step": 1384 }, { "epoch": 0.0975008799718409, "grad_norm": 2.263777732849121, "learning_rate": 1.3833659162928599e-05, "loss": 0.8346, "step": 1385 }, { "epoch": 0.09757127771911299, "grad_norm": 2.3456687927246094, "learning_rate": 1.383331310033465e-05, "loss": 0.9148, "step": 1386 }, { "epoch": 0.09764167546638508, "grad_norm": 2.3890085220336914, "learning_rate": 1.3832966682470563e-05, "loss": 0.7463, "step": 1387 }, { "epoch": 0.09771207321365716, "grad_norm": 3.2109227180480957, "learning_rate": 1.3832619909354347e-05, "loss": 0.7994, "step": 1388 }, { "epoch": 0.09778247096092925, "grad_norm": 2.240267515182495, "learning_rate": 1.3832272781004037e-05, "loss": 0.7914, "step": 1389 }, { "epoch": 0.09785286870820134, "grad_norm": 2.224663019180298, "learning_rate": 1.3831925297437678e-05, "loss": 0.7649, "step": 1390 }, { "epoch": 0.09792326645547343, "grad_norm": 2.4558374881744385, "learning_rate": 1.3831577458673334e-05, "loss": 0.7993, "step": 1391 }, { "epoch": 0.09799366420274551, "grad_norm": 2.7335100173950195, "learning_rate": 1.3831229264729092e-05, "loss": 0.8467, "step": 1392 }, { "epoch": 0.0980640619500176, "grad_norm": 2.355727434158325, "learning_rate": 1.3830880715623052e-05, "loss": 0.8278, "step": 1393 }, { "epoch": 0.09813445969728969, "grad_norm": 2.302319049835205, "learning_rate": 1.3830531811373339e-05, "loss": 0.8303, "step": 1394 }, { "epoch": 0.09820485744456177, "grad_norm": 2.192305564880371, "learning_rate": 1.3830182551998088e-05, "loss": 0.7256, "step": 1395 }, { "epoch": 0.09827525519183386, "grad_norm": 2.150691509246826, "learning_rate": 1.3829832937515463e-05, "loss": 0.7992, "step": 1396 }, { "epoch": 0.09834565293910595, "grad_norm": 2.6273810863494873, "learning_rate": 1.3829482967943637e-05, "loss": 0.8328, "step": 1397 }, { "epoch": 0.09841605068637804, "grad_norm": 2.681490421295166, "learning_rate": 1.3829132643300807e-05, "loss": 0.7747, "step": 1398 }, { "epoch": 0.09848644843365012, "grad_norm": 2.4854586124420166, "learning_rate": 1.3828781963605188e-05, "loss": 0.8758, "step": 1399 }, { "epoch": 0.09855684618092221, "grad_norm": 2.2144296169281006, "learning_rate": 1.3828430928875007e-05, "loss": 0.8129, "step": 1400 }, { "epoch": 0.0986272439281943, "grad_norm": 2.1386542320251465, "learning_rate": 1.3828079539128519e-05, "loss": 0.7519, "step": 1401 }, { "epoch": 0.09869764167546638, "grad_norm": 3.1108200550079346, "learning_rate": 1.3827727794383992e-05, "loss": 0.8049, "step": 1402 }, { "epoch": 0.09876803942273847, "grad_norm": 2.401463747024536, "learning_rate": 1.3827375694659713e-05, "loss": 0.9259, "step": 1403 }, { "epoch": 0.09883843717001056, "grad_norm": 2.5965259075164795, "learning_rate": 1.382702323997399e-05, "loss": 0.7451, "step": 1404 }, { "epoch": 0.09890883491728265, "grad_norm": 2.4389536380767822, "learning_rate": 1.3826670430345144e-05, "loss": 0.7561, "step": 1405 }, { "epoch": 0.09897923266455473, "grad_norm": 2.2094004154205322, "learning_rate": 1.3826317265791519e-05, "loss": 0.8405, "step": 1406 }, { "epoch": 0.09904963041182682, "grad_norm": 2.685638189315796, "learning_rate": 1.382596374633148e-05, "loss": 0.8458, "step": 1407 }, { "epoch": 0.09912002815909891, "grad_norm": 2.936405658721924, "learning_rate": 1.3825609871983402e-05, "loss": 0.7925, "step": 1408 }, { "epoch": 0.09919042590637099, "grad_norm": 3.0183722972869873, "learning_rate": 1.3825255642765685e-05, "loss": 0.7976, "step": 1409 }, { "epoch": 0.09926082365364308, "grad_norm": 2.388864040374756, "learning_rate": 1.3824901058696747e-05, "loss": 0.6759, "step": 1410 }, { "epoch": 0.09933122140091517, "grad_norm": 2.0609161853790283, "learning_rate": 1.382454611979502e-05, "loss": 0.6893, "step": 1411 }, { "epoch": 0.09940161914818726, "grad_norm": 2.0833375453948975, "learning_rate": 1.3824190826078961e-05, "loss": 0.8141, "step": 1412 }, { "epoch": 0.09947201689545934, "grad_norm": 2.0329315662384033, "learning_rate": 1.382383517756704e-05, "loss": 0.6997, "step": 1413 }, { "epoch": 0.09954241464273143, "grad_norm": 2.3665215969085693, "learning_rate": 1.3823479174277747e-05, "loss": 0.8026, "step": 1414 }, { "epoch": 0.09961281239000352, "grad_norm": 2.459826946258545, "learning_rate": 1.3823122816229595e-05, "loss": 0.6956, "step": 1415 }, { "epoch": 0.0996832101372756, "grad_norm": 2.236638069152832, "learning_rate": 1.3822766103441106e-05, "loss": 0.8378, "step": 1416 }, { "epoch": 0.0997536078845477, "grad_norm": 2.592947244644165, "learning_rate": 1.382240903593083e-05, "loss": 0.7812, "step": 1417 }, { "epoch": 0.09982400563181978, "grad_norm": 2.330080986022949, "learning_rate": 1.382205161371733e-05, "loss": 0.8787, "step": 1418 }, { "epoch": 0.09989440337909188, "grad_norm": 2.26477313041687, "learning_rate": 1.3821693836819186e-05, "loss": 0.9215, "step": 1419 }, { "epoch": 0.09996480112636395, "grad_norm": 2.4268507957458496, "learning_rate": 1.3821335705255002e-05, "loss": 0.827, "step": 1420 }, { "epoch": 0.10003519887363604, "grad_norm": 2.1846697330474854, "learning_rate": 1.3820977219043397e-05, "loss": 0.7865, "step": 1421 }, { "epoch": 0.10010559662090814, "grad_norm": 3.0725224018096924, "learning_rate": 1.382061837820301e-05, "loss": 0.8126, "step": 1422 }, { "epoch": 0.10017599436818021, "grad_norm": 2.04606294631958, "learning_rate": 1.3820259182752493e-05, "loss": 0.8874, "step": 1423 }, { "epoch": 0.1002463921154523, "grad_norm": 2.1336867809295654, "learning_rate": 1.3819899632710528e-05, "loss": 0.6933, "step": 1424 }, { "epoch": 0.1003167898627244, "grad_norm": 2.678637742996216, "learning_rate": 1.3819539728095802e-05, "loss": 0.795, "step": 1425 }, { "epoch": 0.10038718760999649, "grad_norm": 2.6219325065612793, "learning_rate": 1.381917946892703e-05, "loss": 0.7864, "step": 1426 }, { "epoch": 0.10045758535726856, "grad_norm": 2.3786487579345703, "learning_rate": 1.3818818855222944e-05, "loss": 0.7588, "step": 1427 }, { "epoch": 0.10052798310454066, "grad_norm": 2.474106788635254, "learning_rate": 1.3818457887002286e-05, "loss": 0.701, "step": 1428 }, { "epoch": 0.10059838085181275, "grad_norm": 2.1290125846862793, "learning_rate": 1.381809656428383e-05, "loss": 0.8068, "step": 1429 }, { "epoch": 0.10066877859908482, "grad_norm": 3.7950210571289062, "learning_rate": 1.3817734887086358e-05, "loss": 0.7795, "step": 1430 }, { "epoch": 0.10073917634635692, "grad_norm": 2.3376305103302, "learning_rate": 1.3817372855428678e-05, "loss": 0.8429, "step": 1431 }, { "epoch": 0.100809574093629, "grad_norm": 2.2078070640563965, "learning_rate": 1.3817010469329607e-05, "loss": 0.7091, "step": 1432 }, { "epoch": 0.1008799718409011, "grad_norm": 2.5158956050872803, "learning_rate": 1.3816647728807986e-05, "loss": 0.7847, "step": 1433 }, { "epoch": 0.10095036958817317, "grad_norm": 3.1038427352905273, "learning_rate": 1.3816284633882678e-05, "loss": 0.7041, "step": 1434 }, { "epoch": 0.10102076733544527, "grad_norm": 2.0924556255340576, "learning_rate": 1.3815921184572557e-05, "loss": 0.7322, "step": 1435 }, { "epoch": 0.10109116508271736, "grad_norm": 2.1860690116882324, "learning_rate": 1.3815557380896521e-05, "loss": 0.7041, "step": 1436 }, { "epoch": 0.10116156282998943, "grad_norm": 2.4287257194519043, "learning_rate": 1.3815193222873485e-05, "loss": 0.7524, "step": 1437 }, { "epoch": 0.10123196057726153, "grad_norm": 2.60332989692688, "learning_rate": 1.3814828710522382e-05, "loss": 0.8885, "step": 1438 }, { "epoch": 0.10130235832453362, "grad_norm": 2.408057451248169, "learning_rate": 1.381446384386216e-05, "loss": 0.7395, "step": 1439 }, { "epoch": 0.10137275607180571, "grad_norm": 2.122253894805908, "learning_rate": 1.3814098622911794e-05, "loss": 0.827, "step": 1440 }, { "epoch": 0.10144315381907779, "grad_norm": 2.693152666091919, "learning_rate": 1.3813733047690267e-05, "loss": 0.8107, "step": 1441 }, { "epoch": 0.10151355156634988, "grad_norm": 2.1799581050872803, "learning_rate": 1.3813367118216589e-05, "loss": 0.7079, "step": 1442 }, { "epoch": 0.10158394931362197, "grad_norm": 2.427865982055664, "learning_rate": 1.3813000834509785e-05, "loss": 0.7561, "step": 1443 }, { "epoch": 0.10165434706089405, "grad_norm": 2.6954433917999268, "learning_rate": 1.3812634196588895e-05, "loss": 0.7484, "step": 1444 }, { "epoch": 0.10172474480816614, "grad_norm": 2.0755398273468018, "learning_rate": 1.3812267204472986e-05, "loss": 0.9217, "step": 1445 }, { "epoch": 0.10179514255543823, "grad_norm": 2.3625447750091553, "learning_rate": 1.3811899858181134e-05, "loss": 0.8219, "step": 1446 }, { "epoch": 0.10186554030271032, "grad_norm": 3.6413323879241943, "learning_rate": 1.381153215773244e-05, "loss": 0.7096, "step": 1447 }, { "epoch": 0.1019359380499824, "grad_norm": 2.14170241355896, "learning_rate": 1.3811164103146018e-05, "loss": 0.818, "step": 1448 }, { "epoch": 0.10200633579725449, "grad_norm": 2.2434394359588623, "learning_rate": 1.3810795694441008e-05, "loss": 0.7065, "step": 1449 }, { "epoch": 0.10207673354452658, "grad_norm": 2.6800942420959473, "learning_rate": 1.3810426931636562e-05, "loss": 0.8615, "step": 1450 }, { "epoch": 0.10214713129179866, "grad_norm": 1.9535430669784546, "learning_rate": 1.3810057814751848e-05, "loss": 0.7508, "step": 1451 }, { "epoch": 0.10221752903907075, "grad_norm": 2.622714042663574, "learning_rate": 1.3809688343806065e-05, "loss": 0.9419, "step": 1452 }, { "epoch": 0.10228792678634284, "grad_norm": 2.1764702796936035, "learning_rate": 1.3809318518818418e-05, "loss": 0.724, "step": 1453 }, { "epoch": 0.10235832453361493, "grad_norm": 15.88931655883789, "learning_rate": 1.380894833980813e-05, "loss": 0.9554, "step": 1454 }, { "epoch": 0.10242872228088701, "grad_norm": 2.414395570755005, "learning_rate": 1.3808577806794456e-05, "loss": 0.8448, "step": 1455 }, { "epoch": 0.1024991200281591, "grad_norm": 2.182896614074707, "learning_rate": 1.3808206919796653e-05, "loss": 0.7506, "step": 1456 }, { "epoch": 0.10256951777543119, "grad_norm": 1.814693808555603, "learning_rate": 1.3807835678834006e-05, "loss": 0.7709, "step": 1457 }, { "epoch": 0.10263991552270327, "grad_norm": 2.3296077251434326, "learning_rate": 1.3807464083925818e-05, "loss": 0.8634, "step": 1458 }, { "epoch": 0.10271031326997536, "grad_norm": 2.0389816761016846, "learning_rate": 1.3807092135091406e-05, "loss": 0.8545, "step": 1459 }, { "epoch": 0.10278071101724745, "grad_norm": 2.178295373916626, "learning_rate": 1.380671983235011e-05, "loss": 0.675, "step": 1460 }, { "epoch": 0.10285110876451954, "grad_norm": 2.246753215789795, "learning_rate": 1.3806347175721283e-05, "loss": 0.8762, "step": 1461 }, { "epoch": 0.10292150651179162, "grad_norm": 2.3613970279693604, "learning_rate": 1.3805974165224307e-05, "loss": 0.8966, "step": 1462 }, { "epoch": 0.10299190425906371, "grad_norm": 2.319946050643921, "learning_rate": 1.3805600800878565e-05, "loss": 0.7722, "step": 1463 }, { "epoch": 0.1030623020063358, "grad_norm": 1.9336450099945068, "learning_rate": 1.3805227082703475e-05, "loss": 0.7318, "step": 1464 }, { "epoch": 0.10313269975360788, "grad_norm": 2.513247489929199, "learning_rate": 1.3804853010718469e-05, "loss": 0.9151, "step": 1465 }, { "epoch": 0.10320309750087997, "grad_norm": 2.2665843963623047, "learning_rate": 1.3804478584942989e-05, "loss": 0.7256, "step": 1466 }, { "epoch": 0.10327349524815206, "grad_norm": 2.424797534942627, "learning_rate": 1.3804103805396506e-05, "loss": 0.863, "step": 1467 }, { "epoch": 0.10334389299542415, "grad_norm": 2.3860042095184326, "learning_rate": 1.3803728672098505e-05, "loss": 0.7132, "step": 1468 }, { "epoch": 0.10341429074269623, "grad_norm": 2.0085227489471436, "learning_rate": 1.3803353185068488e-05, "loss": 0.7155, "step": 1469 }, { "epoch": 0.10348468848996832, "grad_norm": 2.5789105892181396, "learning_rate": 1.3802977344325976e-05, "loss": 0.8819, "step": 1470 }, { "epoch": 0.10355508623724041, "grad_norm": 1.8085596561431885, "learning_rate": 1.3802601149890512e-05, "loss": 0.73, "step": 1471 }, { "epoch": 0.10362548398451249, "grad_norm": 2.3451921939849854, "learning_rate": 1.3802224601781653e-05, "loss": 0.7507, "step": 1472 }, { "epoch": 0.10369588173178458, "grad_norm": 2.8347837924957275, "learning_rate": 1.3801847700018977e-05, "loss": 0.8573, "step": 1473 }, { "epoch": 0.10376627947905667, "grad_norm": 2.3351364135742188, "learning_rate": 1.380147044462208e-05, "loss": 0.6663, "step": 1474 }, { "epoch": 0.10383667722632876, "grad_norm": 2.3278415203094482, "learning_rate": 1.3801092835610571e-05, "loss": 0.758, "step": 1475 }, { "epoch": 0.10390707497360084, "grad_norm": 2.6337335109710693, "learning_rate": 1.3800714873004087e-05, "loss": 0.8577, "step": 1476 }, { "epoch": 0.10397747272087293, "grad_norm": 2.395157814025879, "learning_rate": 1.380033655682228e-05, "loss": 0.8075, "step": 1477 }, { "epoch": 0.10404787046814502, "grad_norm": 2.215142011642456, "learning_rate": 1.3799957887084816e-05, "loss": 0.7872, "step": 1478 }, { "epoch": 0.1041182682154171, "grad_norm": 3.155498504638672, "learning_rate": 1.3799578863811383e-05, "loss": 0.7143, "step": 1479 }, { "epoch": 0.10418866596268919, "grad_norm": 2.5961904525756836, "learning_rate": 1.3799199487021687e-05, "loss": 0.737, "step": 1480 }, { "epoch": 0.10425906370996128, "grad_norm": 2.1150755882263184, "learning_rate": 1.379881975673545e-05, "loss": 0.7873, "step": 1481 }, { "epoch": 0.10432946145723337, "grad_norm": 2.1122090816497803, "learning_rate": 1.3798439672972418e-05, "loss": 0.7582, "step": 1482 }, { "epoch": 0.10439985920450545, "grad_norm": 2.2658681869506836, "learning_rate": 1.3798059235752351e-05, "loss": 0.7638, "step": 1483 }, { "epoch": 0.10447025695177754, "grad_norm": 2.445270299911499, "learning_rate": 1.3797678445095027e-05, "loss": 0.9052, "step": 1484 }, { "epoch": 0.10454065469904963, "grad_norm": 3.452242612838745, "learning_rate": 1.3797297301020245e-05, "loss": 0.711, "step": 1485 }, { "epoch": 0.10461105244632171, "grad_norm": 5.248964309692383, "learning_rate": 1.379691580354782e-05, "loss": 0.8683, "step": 1486 }, { "epoch": 0.1046814501935938, "grad_norm": 2.1563470363616943, "learning_rate": 1.3796533952697584e-05, "loss": 0.7775, "step": 1487 }, { "epoch": 0.1047518479408659, "grad_norm": 2.463484048843384, "learning_rate": 1.3796151748489396e-05, "loss": 0.8441, "step": 1488 }, { "epoch": 0.10482224568813799, "grad_norm": 2.6832313537597656, "learning_rate": 1.3795769190943122e-05, "loss": 0.6716, "step": 1489 }, { "epoch": 0.10489264343541006, "grad_norm": 2.9963252544403076, "learning_rate": 1.3795386280078654e-05, "loss": 0.7348, "step": 1490 }, { "epoch": 0.10496304118268215, "grad_norm": 2.4201712608337402, "learning_rate": 1.3795003015915899e-05, "loss": 0.6824, "step": 1491 }, { "epoch": 0.10503343892995425, "grad_norm": 2.5559449195861816, "learning_rate": 1.3794619398474783e-05, "loss": 0.798, "step": 1492 }, { "epoch": 0.10510383667722632, "grad_norm": 2.4412336349487305, "learning_rate": 1.3794235427775252e-05, "loss": 0.8827, "step": 1493 }, { "epoch": 0.10517423442449841, "grad_norm": 6.1311469078063965, "learning_rate": 1.3793851103837264e-05, "loss": 0.8787, "step": 1494 }, { "epoch": 0.1052446321717705, "grad_norm": 2.130945920944214, "learning_rate": 1.3793466426680807e-05, "loss": 0.8718, "step": 1495 }, { "epoch": 0.1053150299190426, "grad_norm": 2.796344757080078, "learning_rate": 1.379308139632588e-05, "loss": 0.7159, "step": 1496 }, { "epoch": 0.10538542766631467, "grad_norm": 2.3017640113830566, "learning_rate": 1.3792696012792496e-05, "loss": 0.8439, "step": 1497 }, { "epoch": 0.10545582541358677, "grad_norm": 1.9842846393585205, "learning_rate": 1.3792310276100697e-05, "loss": 0.7138, "step": 1498 }, { "epoch": 0.10552622316085886, "grad_norm": 2.2421340942382812, "learning_rate": 1.3791924186270532e-05, "loss": 0.7605, "step": 1499 }, { "epoch": 0.10559662090813093, "grad_norm": 2.492703914642334, "learning_rate": 1.3791537743322079e-05, "loss": 0.6555, "step": 1500 }, { "epoch": 0.10566701865540302, "grad_norm": 2.4917867183685303, "learning_rate": 1.3791150947275428e-05, "loss": 0.8293, "step": 1501 }, { "epoch": 0.10573741640267512, "grad_norm": 2.1704835891723633, "learning_rate": 1.3790763798150688e-05, "loss": 0.6777, "step": 1502 }, { "epoch": 0.10580781414994721, "grad_norm": 4.156985282897949, "learning_rate": 1.379037629596799e-05, "loss": 0.7612, "step": 1503 }, { "epoch": 0.10587821189721928, "grad_norm": 2.530759334564209, "learning_rate": 1.3789988440747477e-05, "loss": 0.6595, "step": 1504 }, { "epoch": 0.10594860964449138, "grad_norm": 2.3515915870666504, "learning_rate": 1.3789600232509315e-05, "loss": 0.7781, "step": 1505 }, { "epoch": 0.10601900739176347, "grad_norm": 2.176265239715576, "learning_rate": 1.378921167127369e-05, "loss": 0.729, "step": 1506 }, { "epoch": 0.10608940513903554, "grad_norm": 2.4852330684661865, "learning_rate": 1.37888227570608e-05, "loss": 0.8698, "step": 1507 }, { "epoch": 0.10615980288630764, "grad_norm": 2.5232508182525635, "learning_rate": 1.3788433489890868e-05, "loss": 0.8116, "step": 1508 }, { "epoch": 0.10623020063357973, "grad_norm": 2.1038448810577393, "learning_rate": 1.378804386978413e-05, "loss": 0.8661, "step": 1509 }, { "epoch": 0.10630059838085182, "grad_norm": 2.3320393562316895, "learning_rate": 1.3787653896760842e-05, "loss": 0.7679, "step": 1510 }, { "epoch": 0.1063709961281239, "grad_norm": 2.31546950340271, "learning_rate": 1.3787263570841283e-05, "loss": 0.6911, "step": 1511 }, { "epoch": 0.10644139387539599, "grad_norm": 2.821732759475708, "learning_rate": 1.3786872892045744e-05, "loss": 0.808, "step": 1512 }, { "epoch": 0.10651179162266808, "grad_norm": 2.292588233947754, "learning_rate": 1.3786481860394535e-05, "loss": 0.8586, "step": 1513 }, { "epoch": 0.10658218936994016, "grad_norm": 2.3611974716186523, "learning_rate": 1.378609047590799e-05, "loss": 0.7076, "step": 1514 }, { "epoch": 0.10665258711721225, "grad_norm": 2.450965404510498, "learning_rate": 1.3785698738606454e-05, "loss": 0.8565, "step": 1515 }, { "epoch": 0.10672298486448434, "grad_norm": 2.2162728309631348, "learning_rate": 1.3785306648510295e-05, "loss": 0.718, "step": 1516 }, { "epoch": 0.10679338261175643, "grad_norm": 1.9335877895355225, "learning_rate": 1.37849142056399e-05, "loss": 0.8567, "step": 1517 }, { "epoch": 0.1068637803590285, "grad_norm": 2.0152080059051514, "learning_rate": 1.3784521410015669e-05, "loss": 0.6725, "step": 1518 }, { "epoch": 0.1069341781063006, "grad_norm": 2.3049638271331787, "learning_rate": 1.3784128261658024e-05, "loss": 0.821, "step": 1519 }, { "epoch": 0.10700457585357269, "grad_norm": 2.1999385356903076, "learning_rate": 1.3783734760587409e-05, "loss": 0.7963, "step": 1520 }, { "epoch": 0.10707497360084477, "grad_norm": 2.3560354709625244, "learning_rate": 1.378334090682428e-05, "loss": 0.8099, "step": 1521 }, { "epoch": 0.10714537134811686, "grad_norm": 2.474320650100708, "learning_rate": 1.3782946700389112e-05, "loss": 0.7399, "step": 1522 }, { "epoch": 0.10721576909538895, "grad_norm": 2.6907835006713867, "learning_rate": 1.3782552141302403e-05, "loss": 0.845, "step": 1523 }, { "epoch": 0.10728616684266104, "grad_norm": 2.3242878913879395, "learning_rate": 1.3782157229584666e-05, "loss": 0.8766, "step": 1524 }, { "epoch": 0.10735656458993312, "grad_norm": 2.5410642623901367, "learning_rate": 1.3781761965256433e-05, "loss": 0.8534, "step": 1525 }, { "epoch": 0.10742696233720521, "grad_norm": 2.1979005336761475, "learning_rate": 1.3781366348338253e-05, "loss": 0.7058, "step": 1526 }, { "epoch": 0.1074973600844773, "grad_norm": 2.142456531524658, "learning_rate": 1.3780970378850695e-05, "loss": 0.6504, "step": 1527 }, { "epoch": 0.10756775783174938, "grad_norm": 2.778135299682617, "learning_rate": 1.3780574056814344e-05, "loss": 0.8209, "step": 1528 }, { "epoch": 0.10763815557902147, "grad_norm": 2.399174928665161, "learning_rate": 1.378017738224981e-05, "loss": 0.8494, "step": 1529 }, { "epoch": 0.10770855332629356, "grad_norm": 2.568596839904785, "learning_rate": 1.3779780355177712e-05, "loss": 0.7679, "step": 1530 }, { "epoch": 0.10777895107356565, "grad_norm": 2.2742397785186768, "learning_rate": 1.3779382975618693e-05, "loss": 0.7192, "step": 1531 }, { "epoch": 0.10784934882083773, "grad_norm": 2.569723129272461, "learning_rate": 1.3778985243593413e-05, "loss": 0.7805, "step": 1532 }, { "epoch": 0.10791974656810982, "grad_norm": 2.9943783283233643, "learning_rate": 1.377858715912255e-05, "loss": 0.7614, "step": 1533 }, { "epoch": 0.10799014431538191, "grad_norm": 1.8934288024902344, "learning_rate": 1.3778188722226803e-05, "loss": 0.7107, "step": 1534 }, { "epoch": 0.10806054206265399, "grad_norm": 1.9892622232437134, "learning_rate": 1.3777789932926884e-05, "loss": 0.8463, "step": 1535 }, { "epoch": 0.10813093980992608, "grad_norm": 2.344217300415039, "learning_rate": 1.3777390791243531e-05, "loss": 0.7441, "step": 1536 }, { "epoch": 0.10820133755719817, "grad_norm": 2.1213929653167725, "learning_rate": 1.3776991297197492e-05, "loss": 0.7516, "step": 1537 }, { "epoch": 0.10827173530447026, "grad_norm": 2.3346328735351562, "learning_rate": 1.3776591450809535e-05, "loss": 0.795, "step": 1538 }, { "epoch": 0.10834213305174234, "grad_norm": 2.819348096847534, "learning_rate": 1.3776191252100455e-05, "loss": 0.7661, "step": 1539 }, { "epoch": 0.10841253079901443, "grad_norm": 2.0938241481781006, "learning_rate": 1.3775790701091053e-05, "loss": 0.6065, "step": 1540 }, { "epoch": 0.10848292854628652, "grad_norm": 2.23346209526062, "learning_rate": 1.3775389797802154e-05, "loss": 0.7844, "step": 1541 }, { "epoch": 0.1085533262935586, "grad_norm": 2.686587333679199, "learning_rate": 1.3774988542254607e-05, "loss": 0.8275, "step": 1542 }, { "epoch": 0.10862372404083069, "grad_norm": 2.984802484512329, "learning_rate": 1.3774586934469269e-05, "loss": 0.7932, "step": 1543 }, { "epoch": 0.10869412178810278, "grad_norm": 2.589205503463745, "learning_rate": 1.377418497446702e-05, "loss": 0.8327, "step": 1544 }, { "epoch": 0.10876451953537487, "grad_norm": 2.5902462005615234, "learning_rate": 1.3773782662268759e-05, "loss": 0.6885, "step": 1545 }, { "epoch": 0.10883491728264695, "grad_norm": 2.215656042098999, "learning_rate": 1.3773379997895402e-05, "loss": 0.7258, "step": 1546 }, { "epoch": 0.10890531502991904, "grad_norm": 2.6093668937683105, "learning_rate": 1.3772976981367885e-05, "loss": 0.7529, "step": 1547 }, { "epoch": 0.10897571277719113, "grad_norm": 2.5215015411376953, "learning_rate": 1.3772573612707161e-05, "loss": 0.8452, "step": 1548 }, { "epoch": 0.10904611052446321, "grad_norm": 2.0720107555389404, "learning_rate": 1.3772169891934201e-05, "loss": 0.7325, "step": 1549 }, { "epoch": 0.1091165082717353, "grad_norm": 2.2003824710845947, "learning_rate": 1.3771765819069998e-05, "loss": 0.8077, "step": 1550 }, { "epoch": 0.1091869060190074, "grad_norm": 1.852858543395996, "learning_rate": 1.3771361394135554e-05, "loss": 0.9487, "step": 1551 }, { "epoch": 0.10925730376627948, "grad_norm": 2.7708635330200195, "learning_rate": 1.3770956617151899e-05, "loss": 0.7743, "step": 1552 }, { "epoch": 0.10932770151355156, "grad_norm": 2.2521536350250244, "learning_rate": 1.3770551488140079e-05, "loss": 0.8309, "step": 1553 }, { "epoch": 0.10939809926082365, "grad_norm": 2.516474962234497, "learning_rate": 1.3770146007121153e-05, "loss": 0.7085, "step": 1554 }, { "epoch": 0.10946849700809574, "grad_norm": 2.722050428390503, "learning_rate": 1.3769740174116205e-05, "loss": 0.7666, "step": 1555 }, { "epoch": 0.10953889475536782, "grad_norm": 2.39557147026062, "learning_rate": 1.3769333989146335e-05, "loss": 0.9074, "step": 1556 }, { "epoch": 0.10960929250263991, "grad_norm": 2.4584810733795166, "learning_rate": 1.3768927452232663e-05, "loss": 0.7519, "step": 1557 }, { "epoch": 0.109679690249912, "grad_norm": 1.8734687566757202, "learning_rate": 1.3768520563396318e-05, "loss": 0.7593, "step": 1558 }, { "epoch": 0.1097500879971841, "grad_norm": 2.6431589126586914, "learning_rate": 1.376811332265846e-05, "loss": 0.8385, "step": 1559 }, { "epoch": 0.10982048574445617, "grad_norm": 2.065969705581665, "learning_rate": 1.3767705730040263e-05, "loss": 0.6954, "step": 1560 }, { "epoch": 0.10989088349172826, "grad_norm": 2.441608190536499, "learning_rate": 1.3767297785562914e-05, "loss": 0.6235, "step": 1561 }, { "epoch": 0.10996128123900036, "grad_norm": 2.069265842437744, "learning_rate": 1.3766889489247625e-05, "loss": 0.8317, "step": 1562 }, { "epoch": 0.11003167898627243, "grad_norm": 2.5596110820770264, "learning_rate": 1.3766480841115622e-05, "loss": 0.7313, "step": 1563 }, { "epoch": 0.11010207673354452, "grad_norm": 2.2286272048950195, "learning_rate": 1.3766071841188153e-05, "loss": 0.8558, "step": 1564 }, { "epoch": 0.11017247448081662, "grad_norm": 2.110929012298584, "learning_rate": 1.3765662489486481e-05, "loss": 0.7445, "step": 1565 }, { "epoch": 0.1102428722280887, "grad_norm": 2.2707436084747314, "learning_rate": 1.376525278603189e-05, "loss": 0.8032, "step": 1566 }, { "epoch": 0.11031326997536078, "grad_norm": 2.361915349960327, "learning_rate": 1.3764842730845677e-05, "loss": 0.7057, "step": 1567 }, { "epoch": 0.11038366772263287, "grad_norm": 2.2197439670562744, "learning_rate": 1.3764432323949166e-05, "loss": 0.751, "step": 1568 }, { "epoch": 0.11045406546990497, "grad_norm": 2.6382789611816406, "learning_rate": 1.376402156536369e-05, "loss": 0.7405, "step": 1569 }, { "epoch": 0.11052446321717704, "grad_norm": 2.2375335693359375, "learning_rate": 1.376361045511061e-05, "loss": 0.751, "step": 1570 }, { "epoch": 0.11059486096444913, "grad_norm": 2.525139093399048, "learning_rate": 1.3763198993211295e-05, "loss": 0.7353, "step": 1571 }, { "epoch": 0.11066525871172123, "grad_norm": 2.719688892364502, "learning_rate": 1.376278717968714e-05, "loss": 0.7547, "step": 1572 }, { "epoch": 0.11073565645899332, "grad_norm": 3.9968318939208984, "learning_rate": 1.3762375014559555e-05, "loss": 0.813, "step": 1573 }, { "epoch": 0.1108060542062654, "grad_norm": 1.9999995231628418, "learning_rate": 1.3761962497849967e-05, "loss": 0.7629, "step": 1574 }, { "epoch": 0.11087645195353749, "grad_norm": 2.2618777751922607, "learning_rate": 1.3761549629579826e-05, "loss": 0.8747, "step": 1575 }, { "epoch": 0.11094684970080958, "grad_norm": 2.2160682678222656, "learning_rate": 1.3761136409770596e-05, "loss": 0.7841, "step": 1576 }, { "epoch": 0.11101724744808165, "grad_norm": 2.665341854095459, "learning_rate": 1.376072283844376e-05, "loss": 0.8087, "step": 1577 }, { "epoch": 0.11108764519535375, "grad_norm": 2.1624488830566406, "learning_rate": 1.3760308915620823e-05, "loss": 0.7127, "step": 1578 }, { "epoch": 0.11115804294262584, "grad_norm": 2.41340970993042, "learning_rate": 1.37598946413233e-05, "loss": 0.7059, "step": 1579 }, { "epoch": 0.11122844068989793, "grad_norm": 2.4131994247436523, "learning_rate": 1.3759480015572736e-05, "loss": 0.7686, "step": 1580 }, { "epoch": 0.11129883843717, "grad_norm": 2.199885129928589, "learning_rate": 1.3759065038390682e-05, "loss": 0.934, "step": 1581 }, { "epoch": 0.1113692361844421, "grad_norm": 2.820788621902466, "learning_rate": 1.3758649709798714e-05, "loss": 0.8383, "step": 1582 }, { "epoch": 0.11143963393171419, "grad_norm": 2.6545708179473877, "learning_rate": 1.3758234029818429e-05, "loss": 0.7904, "step": 1583 }, { "epoch": 0.11151003167898627, "grad_norm": 2.1028177738189697, "learning_rate": 1.3757817998471434e-05, "loss": 0.7631, "step": 1584 }, { "epoch": 0.11158042942625836, "grad_norm": 2.7426469326019287, "learning_rate": 1.3757401615779364e-05, "loss": 0.8399, "step": 1585 }, { "epoch": 0.11165082717353045, "grad_norm": 2.337364435195923, "learning_rate": 1.3756984881763861e-05, "loss": 0.692, "step": 1586 }, { "epoch": 0.11172122492080254, "grad_norm": 3.7064905166625977, "learning_rate": 1.3756567796446597e-05, "loss": 0.7916, "step": 1587 }, { "epoch": 0.11179162266807462, "grad_norm": 2.885540246963501, "learning_rate": 1.3756150359849253e-05, "loss": 0.7648, "step": 1588 }, { "epoch": 0.11186202041534671, "grad_norm": 2.3998348712921143, "learning_rate": 1.3755732571993531e-05, "loss": 0.726, "step": 1589 }, { "epoch": 0.1119324181626188, "grad_norm": 2.584264039993286, "learning_rate": 1.3755314432901158e-05, "loss": 0.7897, "step": 1590 }, { "epoch": 0.11200281590989089, "grad_norm": 2.650606632232666, "learning_rate": 1.3754895942593868e-05, "loss": 0.6567, "step": 1591 }, { "epoch": 0.11207321365716297, "grad_norm": 2.9833645820617676, "learning_rate": 1.3754477101093422e-05, "loss": 0.7043, "step": 1592 }, { "epoch": 0.11214361140443506, "grad_norm": 2.1243815422058105, "learning_rate": 1.375405790842159e-05, "loss": 0.7933, "step": 1593 }, { "epoch": 0.11221400915170715, "grad_norm": 2.1083154678344727, "learning_rate": 1.3753638364600176e-05, "loss": 0.6333, "step": 1594 }, { "epoch": 0.11228440689897923, "grad_norm": 2.7171194553375244, "learning_rate": 1.3753218469650982e-05, "loss": 0.653, "step": 1595 }, { "epoch": 0.11235480464625132, "grad_norm": 2.327781915664673, "learning_rate": 1.3752798223595845e-05, "loss": 0.7474, "step": 1596 }, { "epoch": 0.11242520239352341, "grad_norm": 2.116194725036621, "learning_rate": 1.3752377626456616e-05, "loss": 0.7419, "step": 1597 }, { "epoch": 0.1124956001407955, "grad_norm": 2.2473554611206055, "learning_rate": 1.3751956678255156e-05, "loss": 0.7368, "step": 1598 }, { "epoch": 0.11256599788806758, "grad_norm": 2.37587308883667, "learning_rate": 1.3751535379013354e-05, "loss": 0.7844, "step": 1599 }, { "epoch": 0.11263639563533967, "grad_norm": 2.624990463256836, "learning_rate": 1.3751113728753112e-05, "loss": 0.9343, "step": 1600 }, { "epoch": 0.11270679338261176, "grad_norm": 2.6170895099639893, "learning_rate": 1.3750691727496353e-05, "loss": 0.7902, "step": 1601 }, { "epoch": 0.11277719112988384, "grad_norm": 2.4804720878601074, "learning_rate": 1.3750269375265021e-05, "loss": 0.7791, "step": 1602 }, { "epoch": 0.11284758887715593, "grad_norm": 2.3533356189727783, "learning_rate": 1.3749846672081068e-05, "loss": 0.7054, "step": 1603 }, { "epoch": 0.11291798662442802, "grad_norm": 2.214348316192627, "learning_rate": 1.3749423617966472e-05, "loss": 0.668, "step": 1604 }, { "epoch": 0.11298838437170011, "grad_norm": 3.3927958011627197, "learning_rate": 1.374900021294323e-05, "loss": 0.7077, "step": 1605 }, { "epoch": 0.11305878211897219, "grad_norm": 2.6119022369384766, "learning_rate": 1.3748576457033356e-05, "loss": 0.78, "step": 1606 }, { "epoch": 0.11312917986624428, "grad_norm": 2.0686051845550537, "learning_rate": 1.3748152350258881e-05, "loss": 0.9245, "step": 1607 }, { "epoch": 0.11319957761351637, "grad_norm": 1.9094802141189575, "learning_rate": 1.3747727892641852e-05, "loss": 0.824, "step": 1608 }, { "epoch": 0.11326997536078845, "grad_norm": 2.0125341415405273, "learning_rate": 1.374730308420434e-05, "loss": 0.7698, "step": 1609 }, { "epoch": 0.11334037310806054, "grad_norm": 2.095142126083374, "learning_rate": 1.374687792496843e-05, "loss": 0.8276, "step": 1610 }, { "epoch": 0.11341077085533263, "grad_norm": 1.9614841938018799, "learning_rate": 1.3746452414956225e-05, "loss": 0.7378, "step": 1611 }, { "epoch": 0.11348116860260472, "grad_norm": 2.2347846031188965, "learning_rate": 1.3746026554189849e-05, "loss": 0.7507, "step": 1612 }, { "epoch": 0.1135515663498768, "grad_norm": 3.453697681427002, "learning_rate": 1.3745600342691444e-05, "loss": 0.7648, "step": 1613 }, { "epoch": 0.11362196409714889, "grad_norm": 2.3130042552948, "learning_rate": 1.3745173780483166e-05, "loss": 0.7792, "step": 1614 }, { "epoch": 0.11369236184442098, "grad_norm": 2.4547555446624756, "learning_rate": 1.3744746867587196e-05, "loss": 0.6616, "step": 1615 }, { "epoch": 0.11376275959169306, "grad_norm": 2.9933316707611084, "learning_rate": 1.3744319604025727e-05, "loss": 0.8676, "step": 1616 }, { "epoch": 0.11383315733896515, "grad_norm": 2.3520171642303467, "learning_rate": 1.3743891989820975e-05, "loss": 0.7445, "step": 1617 }, { "epoch": 0.11390355508623724, "grad_norm": 2.0795040130615234, "learning_rate": 1.3743464024995171e-05, "loss": 0.8499, "step": 1618 }, { "epoch": 0.11397395283350933, "grad_norm": 1.9858736991882324, "learning_rate": 1.3743035709570565e-05, "loss": 0.7188, "step": 1619 }, { "epoch": 0.11404435058078141, "grad_norm": 2.459197998046875, "learning_rate": 1.3742607043569425e-05, "loss": 0.708, "step": 1620 }, { "epoch": 0.1141147483280535, "grad_norm": 2.6868722438812256, "learning_rate": 1.3742178027014039e-05, "loss": 0.8647, "step": 1621 }, { "epoch": 0.1141851460753256, "grad_norm": 2.1901774406433105, "learning_rate": 1.374174865992671e-05, "loss": 0.8568, "step": 1622 }, { "epoch": 0.11425554382259767, "grad_norm": 5.0182037353515625, "learning_rate": 1.3741318942329764e-05, "loss": 0.8051, "step": 1623 }, { "epoch": 0.11432594156986976, "grad_norm": 2.520148515701294, "learning_rate": 1.374088887424554e-05, "loss": 0.7025, "step": 1624 }, { "epoch": 0.11439633931714185, "grad_norm": 1.9115002155303955, "learning_rate": 1.3740458455696399e-05, "loss": 0.7787, "step": 1625 }, { "epoch": 0.11446673706441395, "grad_norm": 2.243359327316284, "learning_rate": 1.3740027686704717e-05, "loss": 0.7948, "step": 1626 }, { "epoch": 0.11453713481168602, "grad_norm": 2.48187255859375, "learning_rate": 1.3739596567292895e-05, "loss": 0.6856, "step": 1627 }, { "epoch": 0.11460753255895811, "grad_norm": 2.1901326179504395, "learning_rate": 1.373916509748334e-05, "loss": 0.8855, "step": 1628 }, { "epoch": 0.1146779303062302, "grad_norm": 2.011791467666626, "learning_rate": 1.373873327729849e-05, "loss": 0.6408, "step": 1629 }, { "epoch": 0.11474832805350228, "grad_norm": 2.9995710849761963, "learning_rate": 1.3738301106760791e-05, "loss": 0.9366, "step": 1630 }, { "epoch": 0.11481872580077437, "grad_norm": 2.4127495288848877, "learning_rate": 1.373786858589272e-05, "loss": 0.8328, "step": 1631 }, { "epoch": 0.11488912354804647, "grad_norm": 2.3442323207855225, "learning_rate": 1.3737435714716754e-05, "loss": 0.7791, "step": 1632 }, { "epoch": 0.11495952129531856, "grad_norm": 1.901787519454956, "learning_rate": 1.3737002493255408e-05, "loss": 0.7827, "step": 1633 }, { "epoch": 0.11502991904259063, "grad_norm": 2.2320051193237305, "learning_rate": 1.3736568921531198e-05, "loss": 0.7933, "step": 1634 }, { "epoch": 0.11510031678986272, "grad_norm": 2.824383497238159, "learning_rate": 1.3736134999566668e-05, "loss": 0.6985, "step": 1635 }, { "epoch": 0.11517071453713482, "grad_norm": 2.056641101837158, "learning_rate": 1.3735700727384381e-05, "loss": 0.6575, "step": 1636 }, { "epoch": 0.1152411122844069, "grad_norm": 1.8938333988189697, "learning_rate": 1.3735266105006912e-05, "loss": 0.7672, "step": 1637 }, { "epoch": 0.11531151003167898, "grad_norm": 2.126490354537964, "learning_rate": 1.373483113245686e-05, "loss": 0.7045, "step": 1638 }, { "epoch": 0.11538190777895108, "grad_norm": 2.0757298469543457, "learning_rate": 1.3734395809756836e-05, "loss": 0.836, "step": 1639 }, { "epoch": 0.11545230552622317, "grad_norm": 2.2375311851501465, "learning_rate": 1.3733960136929474e-05, "loss": 0.7888, "step": 1640 }, { "epoch": 0.11552270327349524, "grad_norm": 1.913554310798645, "learning_rate": 1.3733524113997428e-05, "loss": 0.684, "step": 1641 }, { "epoch": 0.11559310102076734, "grad_norm": 2.55549693107605, "learning_rate": 1.3733087740983365e-05, "loss": 0.8235, "step": 1642 }, { "epoch": 0.11566349876803943, "grad_norm": 2.5618345737457275, "learning_rate": 1.373265101790997e-05, "loss": 0.8475, "step": 1643 }, { "epoch": 0.1157338965153115, "grad_norm": 2.6319262981414795, "learning_rate": 1.3732213944799956e-05, "loss": 0.7127, "step": 1644 }, { "epoch": 0.1158042942625836, "grad_norm": 2.0036065578460693, "learning_rate": 1.3731776521676038e-05, "loss": 0.7844, "step": 1645 }, { "epoch": 0.11587469200985569, "grad_norm": 5.534562110900879, "learning_rate": 1.3731338748560963e-05, "loss": 0.7247, "step": 1646 }, { "epoch": 0.11594508975712778, "grad_norm": 2.531480312347412, "learning_rate": 1.373090062547749e-05, "loss": 0.813, "step": 1647 }, { "epoch": 0.11601548750439986, "grad_norm": 2.298062324523926, "learning_rate": 1.3730462152448396e-05, "loss": 0.7269, "step": 1648 }, { "epoch": 0.11608588525167195, "grad_norm": 2.3988592624664307, "learning_rate": 1.373002332949648e-05, "loss": 0.771, "step": 1649 }, { "epoch": 0.11615628299894404, "grad_norm": 2.940329074859619, "learning_rate": 1.372958415664456e-05, "loss": 0.8466, "step": 1650 }, { "epoch": 0.11622668074621612, "grad_norm": 2.16636323928833, "learning_rate": 1.3729144633915462e-05, "loss": 0.8145, "step": 1651 }, { "epoch": 0.1162970784934882, "grad_norm": 1.6889487504959106, "learning_rate": 1.3728704761332041e-05, "loss": 0.8518, "step": 1652 }, { "epoch": 0.1163674762407603, "grad_norm": 2.1010234355926514, "learning_rate": 1.3728264538917164e-05, "loss": 0.6811, "step": 1653 }, { "epoch": 0.11643787398803239, "grad_norm": 2.281613349914551, "learning_rate": 1.372782396669372e-05, "loss": 0.7079, "step": 1654 }, { "epoch": 0.11650827173530447, "grad_norm": 2.1578688621520996, "learning_rate": 1.3727383044684617e-05, "loss": 0.8109, "step": 1655 }, { "epoch": 0.11657866948257656, "grad_norm": 1.805970311164856, "learning_rate": 1.3726941772912775e-05, "loss": 0.7356, "step": 1656 }, { "epoch": 0.11664906722984865, "grad_norm": 2.5441226959228516, "learning_rate": 1.3726500151401139e-05, "loss": 0.7993, "step": 1657 }, { "epoch": 0.11671946497712073, "grad_norm": 2.2186944484710693, "learning_rate": 1.3726058180172669e-05, "loss": 0.7953, "step": 1658 }, { "epoch": 0.11678986272439282, "grad_norm": 2.434722900390625, "learning_rate": 1.3725615859250341e-05, "loss": 0.7261, "step": 1659 }, { "epoch": 0.11686026047166491, "grad_norm": 2.1667368412017822, "learning_rate": 1.3725173188657154e-05, "loss": 0.7615, "step": 1660 }, { "epoch": 0.116930658218937, "grad_norm": 2.457986354827881, "learning_rate": 1.3724730168416121e-05, "loss": 0.6927, "step": 1661 }, { "epoch": 0.11700105596620908, "grad_norm": 7.772386074066162, "learning_rate": 1.3724286798550277e-05, "loss": 0.7993, "step": 1662 }, { "epoch": 0.11707145371348117, "grad_norm": 2.232818841934204, "learning_rate": 1.3723843079082675e-05, "loss": 0.7459, "step": 1663 }, { "epoch": 0.11714185146075326, "grad_norm": 3.3516647815704346, "learning_rate": 1.372339901003638e-05, "loss": 0.7055, "step": 1664 }, { "epoch": 0.11721224920802534, "grad_norm": 1.9879977703094482, "learning_rate": 1.372295459143448e-05, "loss": 0.74, "step": 1665 }, { "epoch": 0.11728264695529743, "grad_norm": 2.4007697105407715, "learning_rate": 1.3722509823300084e-05, "loss": 0.7927, "step": 1666 }, { "epoch": 0.11735304470256952, "grad_norm": 2.414088726043701, "learning_rate": 1.3722064705656313e-05, "loss": 0.8708, "step": 1667 }, { "epoch": 0.11742344244984161, "grad_norm": 2.484210968017578, "learning_rate": 1.372161923852631e-05, "loss": 0.7892, "step": 1668 }, { "epoch": 0.11749384019711369, "grad_norm": 2.1371328830718994, "learning_rate": 1.3721173421933238e-05, "loss": 0.7773, "step": 1669 }, { "epoch": 0.11756423794438578, "grad_norm": 1.9878405332565308, "learning_rate": 1.3720727255900269e-05, "loss": 0.9082, "step": 1670 }, { "epoch": 0.11763463569165787, "grad_norm": 2.158313751220703, "learning_rate": 1.3720280740450607e-05, "loss": 0.7632, "step": 1671 }, { "epoch": 0.11770503343892995, "grad_norm": 1.7978370189666748, "learning_rate": 1.3719833875607459e-05, "loss": 0.7664, "step": 1672 }, { "epoch": 0.11777543118620204, "grad_norm": 2.7071845531463623, "learning_rate": 1.3719386661394064e-05, "loss": 0.6922, "step": 1673 }, { "epoch": 0.11784582893347413, "grad_norm": 2.3808670043945312, "learning_rate": 1.3718939097833673e-05, "loss": 0.7241, "step": 1674 }, { "epoch": 0.11791622668074622, "grad_norm": 1.8843724727630615, "learning_rate": 1.3718491184949549e-05, "loss": 0.7683, "step": 1675 }, { "epoch": 0.1179866244280183, "grad_norm": 2.229771614074707, "learning_rate": 1.3718042922764987e-05, "loss": 0.8175, "step": 1676 }, { "epoch": 0.11805702217529039, "grad_norm": 2.474583864212036, "learning_rate": 1.3717594311303288e-05, "loss": 0.6857, "step": 1677 }, { "epoch": 0.11812741992256248, "grad_norm": 2.1939141750335693, "learning_rate": 1.3717145350587776e-05, "loss": 0.8505, "step": 1678 }, { "epoch": 0.11819781766983456, "grad_norm": 2.1721243858337402, "learning_rate": 1.3716696040641796e-05, "loss": 0.8009, "step": 1679 }, { "epoch": 0.11826821541710665, "grad_norm": 2.215710163116455, "learning_rate": 1.3716246381488703e-05, "loss": 0.8138, "step": 1680 }, { "epoch": 0.11833861316437874, "grad_norm": 2.1449291706085205, "learning_rate": 1.3715796373151882e-05, "loss": 0.8522, "step": 1681 }, { "epoch": 0.11840901091165083, "grad_norm": 2.0194499492645264, "learning_rate": 1.3715346015654722e-05, "loss": 0.7902, "step": 1682 }, { "epoch": 0.11847940865892291, "grad_norm": 2.434890031814575, "learning_rate": 1.3714895309020643e-05, "loss": 0.7413, "step": 1683 }, { "epoch": 0.118549806406195, "grad_norm": 1.8053114414215088, "learning_rate": 1.3714444253273072e-05, "loss": 0.7361, "step": 1684 }, { "epoch": 0.1186202041534671, "grad_norm": 2.92336368560791, "learning_rate": 1.3713992848435469e-05, "loss": 0.8394, "step": 1685 }, { "epoch": 0.11869060190073917, "grad_norm": 2.0443735122680664, "learning_rate": 1.3713541094531293e-05, "loss": 0.7837, "step": 1686 }, { "epoch": 0.11876099964801126, "grad_norm": 2.306548833847046, "learning_rate": 1.3713088991584036e-05, "loss": 0.739, "step": 1687 }, { "epoch": 0.11883139739528335, "grad_norm": 2.546522855758667, "learning_rate": 1.3712636539617203e-05, "loss": 0.694, "step": 1688 }, { "epoch": 0.11890179514255544, "grad_norm": 2.7044475078582764, "learning_rate": 1.3712183738654317e-05, "loss": 0.864, "step": 1689 }, { "epoch": 0.11897219288982752, "grad_norm": 2.1581878662109375, "learning_rate": 1.3711730588718918e-05, "loss": 0.7442, "step": 1690 }, { "epoch": 0.11904259063709961, "grad_norm": 2.402418613433838, "learning_rate": 1.371127708983457e-05, "loss": 0.823, "step": 1691 }, { "epoch": 0.1191129883843717, "grad_norm": 2.248704433441162, "learning_rate": 1.3710823242024847e-05, "loss": 0.8254, "step": 1692 }, { "epoch": 0.11918338613164378, "grad_norm": 2.4577865600585938, "learning_rate": 1.3710369045313347e-05, "loss": 0.7927, "step": 1693 }, { "epoch": 0.11925378387891587, "grad_norm": 3.4288063049316406, "learning_rate": 1.370991449972368e-05, "loss": 0.791, "step": 1694 }, { "epoch": 0.11932418162618796, "grad_norm": 2.7118136882781982, "learning_rate": 1.3709459605279484e-05, "loss": 0.7797, "step": 1695 }, { "epoch": 0.11939457937346006, "grad_norm": 2.2333576679229736, "learning_rate": 1.3709004362004405e-05, "loss": 0.7694, "step": 1696 }, { "epoch": 0.11946497712073213, "grad_norm": 2.468637466430664, "learning_rate": 1.3708548769922114e-05, "loss": 0.8167, "step": 1697 }, { "epoch": 0.11953537486800422, "grad_norm": 2.214554786682129, "learning_rate": 1.3708092829056297e-05, "loss": 0.77, "step": 1698 }, { "epoch": 0.11960577261527632, "grad_norm": 2.2729156017303467, "learning_rate": 1.3707636539430658e-05, "loss": 0.7675, "step": 1699 }, { "epoch": 0.11967617036254839, "grad_norm": 2.5307085514068604, "learning_rate": 1.3707179901068922e-05, "loss": 0.9011, "step": 1700 }, { "epoch": 0.11974656810982048, "grad_norm": 2.4170241355895996, "learning_rate": 1.3706722913994826e-05, "loss": 0.7173, "step": 1701 }, { "epoch": 0.11981696585709257, "grad_norm": 2.164560317993164, "learning_rate": 1.3706265578232134e-05, "loss": 1.0667, "step": 1702 }, { "epoch": 0.11988736360436467, "grad_norm": 2.319335699081421, "learning_rate": 1.370580789380462e-05, "loss": 0.6794, "step": 1703 }, { "epoch": 0.11995776135163674, "grad_norm": 2.3606350421905518, "learning_rate": 1.3705349860736082e-05, "loss": 0.83, "step": 1704 }, { "epoch": 0.12002815909890883, "grad_norm": 2.196242570877075, "learning_rate": 1.3704891479050329e-05, "loss": 0.8052, "step": 1705 }, { "epoch": 0.12009855684618093, "grad_norm": 2.6347365379333496, "learning_rate": 1.37044327487712e-05, "loss": 0.7924, "step": 1706 }, { "epoch": 0.120168954593453, "grad_norm": 2.2286832332611084, "learning_rate": 1.3703973669922538e-05, "loss": 0.8611, "step": 1707 }, { "epoch": 0.1202393523407251, "grad_norm": 2.0836708545684814, "learning_rate": 1.3703514242528211e-05, "loss": 0.6764, "step": 1708 }, { "epoch": 0.12030975008799719, "grad_norm": 2.0547873973846436, "learning_rate": 1.370305446661211e-05, "loss": 0.8295, "step": 1709 }, { "epoch": 0.12038014783526928, "grad_norm": 2.275881767272949, "learning_rate": 1.3702594342198138e-05, "loss": 0.8162, "step": 1710 }, { "epoch": 0.12045054558254135, "grad_norm": 1.9025455713272095, "learning_rate": 1.3702133869310213e-05, "loss": 0.7354, "step": 1711 }, { "epoch": 0.12052094332981345, "grad_norm": 2.205639600753784, "learning_rate": 1.370167304797228e-05, "loss": 0.7218, "step": 1712 }, { "epoch": 0.12059134107708554, "grad_norm": 2.573308229446411, "learning_rate": 1.3701211878208297e-05, "loss": 0.8593, "step": 1713 }, { "epoch": 0.12066173882435761, "grad_norm": 2.389561653137207, "learning_rate": 1.3700750360042236e-05, "loss": 0.7506, "step": 1714 }, { "epoch": 0.1207321365716297, "grad_norm": 1.9726464748382568, "learning_rate": 1.3700288493498097e-05, "loss": 0.7932, "step": 1715 }, { "epoch": 0.1208025343189018, "grad_norm": 2.191450357437134, "learning_rate": 1.3699826278599892e-05, "loss": 0.8635, "step": 1716 }, { "epoch": 0.12087293206617389, "grad_norm": 2.0865025520324707, "learning_rate": 1.3699363715371652e-05, "loss": 0.7982, "step": 1717 }, { "epoch": 0.12094332981344597, "grad_norm": 1.8518706560134888, "learning_rate": 1.3698900803837422e-05, "loss": 0.642, "step": 1718 }, { "epoch": 0.12101372756071806, "grad_norm": 2.526310920715332, "learning_rate": 1.3698437544021273e-05, "loss": 0.8494, "step": 1719 }, { "epoch": 0.12108412530799015, "grad_norm": 2.230039119720459, "learning_rate": 1.3697973935947291e-05, "loss": 0.6433, "step": 1720 }, { "epoch": 0.12115452305526223, "grad_norm": 1.886214256286621, "learning_rate": 1.3697509979639579e-05, "loss": 0.7352, "step": 1721 }, { "epoch": 0.12122492080253432, "grad_norm": 2.7028634548187256, "learning_rate": 1.3697045675122257e-05, "loss": 0.7563, "step": 1722 }, { "epoch": 0.12129531854980641, "grad_norm": 2.1451802253723145, "learning_rate": 1.3696581022419461e-05, "loss": 0.7101, "step": 1723 }, { "epoch": 0.1213657162970785, "grad_norm": 3.1332104206085205, "learning_rate": 1.3696116021555357e-05, "loss": 0.6775, "step": 1724 }, { "epoch": 0.12143611404435058, "grad_norm": 2.4279415607452393, "learning_rate": 1.3695650672554114e-05, "loss": 0.7864, "step": 1725 }, { "epoch": 0.12150651179162267, "grad_norm": 2.1416592597961426, "learning_rate": 1.3695184975439932e-05, "loss": 0.7818, "step": 1726 }, { "epoch": 0.12157690953889476, "grad_norm": 2.28859543800354, "learning_rate": 1.3694718930237018e-05, "loss": 0.8247, "step": 1727 }, { "epoch": 0.12164730728616684, "grad_norm": 2.4603631496429443, "learning_rate": 1.3694252536969605e-05, "loss": 0.7613, "step": 1728 }, { "epoch": 0.12171770503343893, "grad_norm": 2.2283670902252197, "learning_rate": 1.3693785795661935e-05, "loss": 0.7244, "step": 1729 }, { "epoch": 0.12178810278071102, "grad_norm": 3.2336344718933105, "learning_rate": 1.3693318706338283e-05, "loss": 0.9057, "step": 1730 }, { "epoch": 0.12185850052798311, "grad_norm": 1.9941760301589966, "learning_rate": 1.369285126902293e-05, "loss": 0.7498, "step": 1731 }, { "epoch": 0.12192889827525519, "grad_norm": 2.0819497108459473, "learning_rate": 1.3692383483740179e-05, "loss": 0.7401, "step": 1732 }, { "epoch": 0.12199929602252728, "grad_norm": 2.2932868003845215, "learning_rate": 1.3691915350514349e-05, "loss": 0.7393, "step": 1733 }, { "epoch": 0.12206969376979937, "grad_norm": 2.5256993770599365, "learning_rate": 1.3691446869369777e-05, "loss": 0.8674, "step": 1734 }, { "epoch": 0.12214009151707145, "grad_norm": 2.2602732181549072, "learning_rate": 1.3690978040330825e-05, "loss": 0.7654, "step": 1735 }, { "epoch": 0.12221048926434354, "grad_norm": 2.0076351165771484, "learning_rate": 1.3690508863421861e-05, "loss": 0.6942, "step": 1736 }, { "epoch": 0.12228088701161563, "grad_norm": 2.7057385444641113, "learning_rate": 1.3690039338667287e-05, "loss": 0.7423, "step": 1737 }, { "epoch": 0.12235128475888772, "grad_norm": 2.248765230178833, "learning_rate": 1.3689569466091508e-05, "loss": 0.722, "step": 1738 }, { "epoch": 0.1224216825061598, "grad_norm": 2.634721517562866, "learning_rate": 1.368909924571895e-05, "loss": 0.7147, "step": 1739 }, { "epoch": 0.12249208025343189, "grad_norm": 1.789183259010315, "learning_rate": 1.3688628677574067e-05, "loss": 0.6733, "step": 1740 }, { "epoch": 0.12256247800070398, "grad_norm": 2.236478328704834, "learning_rate": 1.3688157761681323e-05, "loss": 0.9276, "step": 1741 }, { "epoch": 0.12263287574797606, "grad_norm": 2.4731874465942383, "learning_rate": 1.3687686498065196e-05, "loss": 0.6293, "step": 1742 }, { "epoch": 0.12270327349524815, "grad_norm": 2.5843875408172607, "learning_rate": 1.3687214886750195e-05, "loss": 0.7737, "step": 1743 }, { "epoch": 0.12277367124252024, "grad_norm": 1.8833657503128052, "learning_rate": 1.3686742927760834e-05, "loss": 0.6721, "step": 1744 }, { "epoch": 0.12284406898979233, "grad_norm": 2.26562237739563, "learning_rate": 1.3686270621121653e-05, "loss": 0.8258, "step": 1745 }, { "epoch": 0.12291446673706441, "grad_norm": 2.5318357944488525, "learning_rate": 1.3685797966857205e-05, "loss": 0.6698, "step": 1746 }, { "epoch": 0.1229848644843365, "grad_norm": 2.6742749214172363, "learning_rate": 1.3685324964992067e-05, "loss": 0.8127, "step": 1747 }, { "epoch": 0.12305526223160859, "grad_norm": 2.2961127758026123, "learning_rate": 1.368485161555083e-05, "loss": 0.795, "step": 1748 }, { "epoch": 0.12312565997888067, "grad_norm": 2.0450546741485596, "learning_rate": 1.3684377918558103e-05, "loss": 0.7856, "step": 1749 }, { "epoch": 0.12319605772615276, "grad_norm": 2.219982147216797, "learning_rate": 1.3683903874038514e-05, "loss": 0.6871, "step": 1750 }, { "epoch": 0.12326645547342485, "grad_norm": 2.5235648155212402, "learning_rate": 1.3683429482016709e-05, "loss": 0.6545, "step": 1751 }, { "epoch": 0.12333685322069694, "grad_norm": 2.68070125579834, "learning_rate": 1.3682954742517354e-05, "loss": 0.5686, "step": 1752 }, { "epoch": 0.12340725096796902, "grad_norm": 2.7134644985198975, "learning_rate": 1.3682479655565126e-05, "loss": 0.9332, "step": 1753 }, { "epoch": 0.12347764871524111, "grad_norm": 2.51389479637146, "learning_rate": 1.368200422118473e-05, "loss": 0.8313, "step": 1754 }, { "epoch": 0.1235480464625132, "grad_norm": 2.3755791187286377, "learning_rate": 1.3681528439400885e-05, "loss": 0.7192, "step": 1755 }, { "epoch": 0.12361844420978528, "grad_norm": 2.498749017715454, "learning_rate": 1.3681052310238325e-05, "loss": 0.729, "step": 1756 }, { "epoch": 0.12368884195705737, "grad_norm": 1.7530375719070435, "learning_rate": 1.3680575833721803e-05, "loss": 0.7343, "step": 1757 }, { "epoch": 0.12375923970432946, "grad_norm": 2.370089530944824, "learning_rate": 1.3680099009876094e-05, "loss": 0.814, "step": 1758 }, { "epoch": 0.12382963745160155, "grad_norm": 2.9836509227752686, "learning_rate": 1.3679621838725988e-05, "loss": 0.7956, "step": 1759 }, { "epoch": 0.12390003519887363, "grad_norm": 3.9581139087677, "learning_rate": 1.3679144320296295e-05, "loss": 0.7087, "step": 1760 }, { "epoch": 0.12397043294614572, "grad_norm": 2.209245204925537, "learning_rate": 1.3678666454611838e-05, "loss": 0.7089, "step": 1761 }, { "epoch": 0.12404083069341781, "grad_norm": 5.0127458572387695, "learning_rate": 1.3678188241697463e-05, "loss": 0.7511, "step": 1762 }, { "epoch": 0.12411122844068989, "grad_norm": 2.542032480239868, "learning_rate": 1.3677709681578033e-05, "loss": 0.7977, "step": 1763 }, { "epoch": 0.12418162618796198, "grad_norm": 2.505225419998169, "learning_rate": 1.3677230774278428e-05, "loss": 0.821, "step": 1764 }, { "epoch": 0.12425202393523407, "grad_norm": 2.4881508350372314, "learning_rate": 1.3676751519823549e-05, "loss": 0.8916, "step": 1765 }, { "epoch": 0.12432242168250617, "grad_norm": 2.3500194549560547, "learning_rate": 1.3676271918238312e-05, "loss": 0.7423, "step": 1766 }, { "epoch": 0.12439281942977824, "grad_norm": 2.2898924350738525, "learning_rate": 1.3675791969547651e-05, "loss": 0.6642, "step": 1767 }, { "epoch": 0.12446321717705033, "grad_norm": 2.7802574634552, "learning_rate": 1.367531167377652e-05, "loss": 0.7727, "step": 1768 }, { "epoch": 0.12453361492432242, "grad_norm": 2.9698970317840576, "learning_rate": 1.3674831030949889e-05, "loss": 0.8203, "step": 1769 }, { "epoch": 0.1246040126715945, "grad_norm": 2.263657331466675, "learning_rate": 1.3674350041092749e-05, "loss": 0.7665, "step": 1770 }, { "epoch": 0.1246744104188666, "grad_norm": 2.315575361251831, "learning_rate": 1.3673868704230103e-05, "loss": 0.7683, "step": 1771 }, { "epoch": 0.12474480816613868, "grad_norm": 2.316565990447998, "learning_rate": 1.3673387020386982e-05, "loss": 0.7948, "step": 1772 }, { "epoch": 0.12481520591341078, "grad_norm": 2.2005763053894043, "learning_rate": 1.3672904989588425e-05, "loss": 0.8106, "step": 1773 }, { "epoch": 0.12488560366068285, "grad_norm": 2.4389989376068115, "learning_rate": 1.3672422611859493e-05, "loss": 0.7609, "step": 1774 }, { "epoch": 0.12495600140795494, "grad_norm": 2.227780818939209, "learning_rate": 1.3671939887225267e-05, "loss": 0.7493, "step": 1775 }, { "epoch": 0.12502639915522704, "grad_norm": 2.284590244293213, "learning_rate": 1.3671456815710845e-05, "loss": 0.7383, "step": 1776 }, { "epoch": 0.1250967969024991, "grad_norm": 2.3576862812042236, "learning_rate": 1.3670973397341339e-05, "loss": 0.8226, "step": 1777 }, { "epoch": 0.12516719464977122, "grad_norm": 2.285094738006592, "learning_rate": 1.3670489632141887e-05, "loss": 1.0223, "step": 1778 }, { "epoch": 0.1252375923970433, "grad_norm": 2.1608047485351562, "learning_rate": 1.3670005520137637e-05, "loss": 0.7904, "step": 1779 }, { "epoch": 0.12530799014431537, "grad_norm": 3.7611827850341797, "learning_rate": 1.3669521061353758e-05, "loss": 0.8047, "step": 1780 }, { "epoch": 0.12537838789158748, "grad_norm": 2.2425339221954346, "learning_rate": 1.366903625581544e-05, "loss": 0.7349, "step": 1781 }, { "epoch": 0.12544878563885956, "grad_norm": 2.3334922790527344, "learning_rate": 1.3668551103547886e-05, "loss": 0.8375, "step": 1782 }, { "epoch": 0.12551918338613163, "grad_norm": 2.2295351028442383, "learning_rate": 1.3668065604576322e-05, "loss": 0.7182, "step": 1783 }, { "epoch": 0.12558958113340374, "grad_norm": 2.0622599124908447, "learning_rate": 1.3667579758925988e-05, "loss": 0.6562, "step": 1784 }, { "epoch": 0.12565997888067582, "grad_norm": 2.1802384853363037, "learning_rate": 1.3667093566622144e-05, "loss": 0.6353, "step": 1785 }, { "epoch": 0.1257303766279479, "grad_norm": 2.2342283725738525, "learning_rate": 1.3666607027690067e-05, "loss": 0.8221, "step": 1786 }, { "epoch": 0.12580077437522, "grad_norm": 3.9241135120391846, "learning_rate": 1.3666120142155052e-05, "loss": 0.6842, "step": 1787 }, { "epoch": 0.12587117212249208, "grad_norm": 3.1805524826049805, "learning_rate": 1.3665632910042416e-05, "loss": 0.7361, "step": 1788 }, { "epoch": 0.12594156986976418, "grad_norm": 2.025632858276367, "learning_rate": 1.3665145331377485e-05, "loss": 0.8175, "step": 1789 }, { "epoch": 0.12601196761703626, "grad_norm": 2.3465023040771484, "learning_rate": 1.3664657406185616e-05, "loss": 0.8467, "step": 1790 }, { "epoch": 0.12608236536430834, "grad_norm": 2.5781002044677734, "learning_rate": 1.366416913449217e-05, "loss": 0.7824, "step": 1791 }, { "epoch": 0.12615276311158044, "grad_norm": 2.270998477935791, "learning_rate": 1.3663680516322536e-05, "loss": 0.7427, "step": 1792 }, { "epoch": 0.12622316085885252, "grad_norm": 2.3521499633789062, "learning_rate": 1.3663191551702117e-05, "loss": 0.7755, "step": 1793 }, { "epoch": 0.1262935586061246, "grad_norm": 2.192549228668213, "learning_rate": 1.3662702240656335e-05, "loss": 0.8267, "step": 1794 }, { "epoch": 0.1263639563533967, "grad_norm": 3.4676694869995117, "learning_rate": 1.3662212583210626e-05, "loss": 0.8215, "step": 1795 }, { "epoch": 0.12643435410066878, "grad_norm": 1.8905069828033447, "learning_rate": 1.3661722579390457e-05, "loss": 0.705, "step": 1796 }, { "epoch": 0.12650475184794086, "grad_norm": 1.945239782333374, "learning_rate": 1.3661232229221293e-05, "loss": 0.8094, "step": 1797 }, { "epoch": 0.12657514959521296, "grad_norm": 1.8961021900177002, "learning_rate": 1.3660741532728634e-05, "loss": 0.7127, "step": 1798 }, { "epoch": 0.12664554734248504, "grad_norm": 2.2346982955932617, "learning_rate": 1.3660250489937992e-05, "loss": 0.7724, "step": 1799 }, { "epoch": 0.12671594508975711, "grad_norm": 2.88657283782959, "learning_rate": 1.3659759100874894e-05, "loss": 0.7675, "step": 1800 }, { "epoch": 0.12678634283702922, "grad_norm": 2.903137445449829, "learning_rate": 1.365926736556489e-05, "loss": 0.7079, "step": 1801 }, { "epoch": 0.1268567405843013, "grad_norm": 2.1306254863739014, "learning_rate": 1.3658775284033543e-05, "loss": 0.8583, "step": 1802 }, { "epoch": 0.1269271383315734, "grad_norm": 2.0538322925567627, "learning_rate": 1.3658282856306439e-05, "loss": 0.7504, "step": 1803 }, { "epoch": 0.12699753607884548, "grad_norm": 2.036475896835327, "learning_rate": 1.3657790082409178e-05, "loss": 0.7124, "step": 1804 }, { "epoch": 0.12706793382611756, "grad_norm": 2.062352418899536, "learning_rate": 1.3657296962367384e-05, "loss": 0.8698, "step": 1805 }, { "epoch": 0.12713833157338966, "grad_norm": 2.619528293609619, "learning_rate": 1.3656803496206689e-05, "loss": 0.6982, "step": 1806 }, { "epoch": 0.12720872932066174, "grad_norm": 2.4403038024902344, "learning_rate": 1.3656309683952753e-05, "loss": 0.7763, "step": 1807 }, { "epoch": 0.12727912706793382, "grad_norm": 2.480867624282837, "learning_rate": 1.3655815525631247e-05, "loss": 0.7699, "step": 1808 }, { "epoch": 0.12734952481520592, "grad_norm": 2.3262274265289307, "learning_rate": 1.3655321021267866e-05, "loss": 0.6855, "step": 1809 }, { "epoch": 0.127419922562478, "grad_norm": 2.6729824542999268, "learning_rate": 1.3654826170888316e-05, "loss": 0.8057, "step": 1810 }, { "epoch": 0.12749032030975008, "grad_norm": 2.2517542839050293, "learning_rate": 1.3654330974518325e-05, "loss": 0.7157, "step": 1811 }, { "epoch": 0.12756071805702218, "grad_norm": 2.434354543685913, "learning_rate": 1.3653835432183644e-05, "loss": 0.7105, "step": 1812 }, { "epoch": 0.12763111580429426, "grad_norm": 2.0128586292266846, "learning_rate": 1.3653339543910029e-05, "loss": 0.6885, "step": 1813 }, { "epoch": 0.12770151355156634, "grad_norm": 2.305865526199341, "learning_rate": 1.3652843309723267e-05, "loss": 0.616, "step": 1814 }, { "epoch": 0.12777191129883844, "grad_norm": 2.304600477218628, "learning_rate": 1.3652346729649157e-05, "loss": 0.7827, "step": 1815 }, { "epoch": 0.12784230904611052, "grad_norm": 2.375638961791992, "learning_rate": 1.3651849803713515e-05, "loss": 0.8232, "step": 1816 }, { "epoch": 0.12791270679338262, "grad_norm": 2.7710015773773193, "learning_rate": 1.3651352531942176e-05, "loss": 0.7432, "step": 1817 }, { "epoch": 0.1279831045406547, "grad_norm": 2.4607651233673096, "learning_rate": 1.3650854914360997e-05, "loss": 0.7505, "step": 1818 }, { "epoch": 0.12805350228792678, "grad_norm": 2.492663860321045, "learning_rate": 1.3650356950995847e-05, "loss": 0.8469, "step": 1819 }, { "epoch": 0.12812390003519888, "grad_norm": 2.140880584716797, "learning_rate": 1.3649858641872617e-05, "loss": 0.8327, "step": 1820 }, { "epoch": 0.12819429778247096, "grad_norm": 2.491886854171753, "learning_rate": 1.3649359987017213e-05, "loss": 0.7072, "step": 1821 }, { "epoch": 0.12826469552974304, "grad_norm": 2.2673513889312744, "learning_rate": 1.3648860986455561e-05, "loss": 0.7884, "step": 1822 }, { "epoch": 0.12833509327701514, "grad_norm": 2.0024282932281494, "learning_rate": 1.3648361640213607e-05, "loss": 0.748, "step": 1823 }, { "epoch": 0.12840549102428722, "grad_norm": 2.7802116870880127, "learning_rate": 1.3647861948317307e-05, "loss": 0.8078, "step": 1824 }, { "epoch": 0.1284758887715593, "grad_norm": 2.1268422603607178, "learning_rate": 1.3647361910792647e-05, "loss": 0.8124, "step": 1825 }, { "epoch": 0.1285462865188314, "grad_norm": 2.3044674396514893, "learning_rate": 1.3646861527665617e-05, "loss": 0.761, "step": 1826 }, { "epoch": 0.12861668426610348, "grad_norm": 2.2514801025390625, "learning_rate": 1.364636079896224e-05, "loss": 0.8815, "step": 1827 }, { "epoch": 0.12868708201337556, "grad_norm": 2.3364713191986084, "learning_rate": 1.3645859724708542e-05, "loss": 0.889, "step": 1828 }, { "epoch": 0.12875747976064766, "grad_norm": 2.194827079772949, "learning_rate": 1.364535830493058e-05, "loss": 0.7427, "step": 1829 }, { "epoch": 0.12882787750791974, "grad_norm": 2.7342145442962646, "learning_rate": 1.3644856539654423e-05, "loss": 0.861, "step": 1830 }, { "epoch": 0.12889827525519185, "grad_norm": 2.5517847537994385, "learning_rate": 1.3644354428906155e-05, "loss": 0.7664, "step": 1831 }, { "epoch": 0.12896867300246392, "grad_norm": 2.11301589012146, "learning_rate": 1.3643851972711884e-05, "loss": 0.7276, "step": 1832 }, { "epoch": 0.129039070749736, "grad_norm": 2.1110448837280273, "learning_rate": 1.364334917109773e-05, "loss": 0.6611, "step": 1833 }, { "epoch": 0.1291094684970081, "grad_norm": 1.6364364624023438, "learning_rate": 1.3642846024089837e-05, "loss": 0.8619, "step": 1834 }, { "epoch": 0.12917986624428018, "grad_norm": 2.0771231651306152, "learning_rate": 1.3642342531714361e-05, "loss": 0.7425, "step": 1835 }, { "epoch": 0.12925026399155226, "grad_norm": 2.2103233337402344, "learning_rate": 1.3641838693997485e-05, "loss": 0.7834, "step": 1836 }, { "epoch": 0.12932066173882437, "grad_norm": 2.305102825164795, "learning_rate": 1.36413345109654e-05, "loss": 0.8091, "step": 1837 }, { "epoch": 0.12939105948609644, "grad_norm": 2.3371663093566895, "learning_rate": 1.3640829982644316e-05, "loss": 0.657, "step": 1838 }, { "epoch": 0.12946145723336852, "grad_norm": 2.1166188716888428, "learning_rate": 1.3640325109060466e-05, "loss": 0.7913, "step": 1839 }, { "epoch": 0.12953185498064063, "grad_norm": 2.5462310314178467, "learning_rate": 1.3639819890240103e-05, "loss": 0.7464, "step": 1840 }, { "epoch": 0.1296022527279127, "grad_norm": 2.9634149074554443, "learning_rate": 1.363931432620949e-05, "loss": 0.7995, "step": 1841 }, { "epoch": 0.12967265047518478, "grad_norm": 3.758740186691284, "learning_rate": 1.363880841699491e-05, "loss": 0.7269, "step": 1842 }, { "epoch": 0.12974304822245689, "grad_norm": 2.2347915172576904, "learning_rate": 1.363830216262267e-05, "loss": 0.6948, "step": 1843 }, { "epoch": 0.12981344596972896, "grad_norm": 2.3621180057525635, "learning_rate": 1.3637795563119089e-05, "loss": 0.8325, "step": 1844 }, { "epoch": 0.12988384371700107, "grad_norm": 2.2070670127868652, "learning_rate": 1.3637288618510503e-05, "loss": 0.6647, "step": 1845 }, { "epoch": 0.12995424146427315, "grad_norm": 2.2238829135894775, "learning_rate": 1.3636781328823273e-05, "loss": 0.6848, "step": 1846 }, { "epoch": 0.13002463921154522, "grad_norm": 3.487086772918701, "learning_rate": 1.363627369408377e-05, "loss": 0.9438, "step": 1847 }, { "epoch": 0.13009503695881733, "grad_norm": 2.861480236053467, "learning_rate": 1.3635765714318387e-05, "loss": 0.8817, "step": 1848 }, { "epoch": 0.1301654347060894, "grad_norm": 2.2094192504882812, "learning_rate": 1.3635257389553536e-05, "loss": 0.7206, "step": 1849 }, { "epoch": 0.13023583245336148, "grad_norm": 2.120537042617798, "learning_rate": 1.3634748719815644e-05, "loss": 0.8532, "step": 1850 }, { "epoch": 0.1303062302006336, "grad_norm": 2.262815237045288, "learning_rate": 1.3634239705131155e-05, "loss": 0.8488, "step": 1851 }, { "epoch": 0.13037662794790567, "grad_norm": 2.104053258895874, "learning_rate": 1.3633730345526538e-05, "loss": 0.8025, "step": 1852 }, { "epoch": 0.13044702569517774, "grad_norm": 2.100344181060791, "learning_rate": 1.3633220641028271e-05, "loss": 0.6646, "step": 1853 }, { "epoch": 0.13051742344244985, "grad_norm": 3.003621816635132, "learning_rate": 1.3632710591662857e-05, "loss": 0.8229, "step": 1854 }, { "epoch": 0.13058782118972193, "grad_norm": 2.574700117111206, "learning_rate": 1.3632200197456812e-05, "loss": 0.7211, "step": 1855 }, { "epoch": 0.130658218936994, "grad_norm": 2.1832973957061768, "learning_rate": 1.363168945843667e-05, "loss": 0.7285, "step": 1856 }, { "epoch": 0.1307286166842661, "grad_norm": 2.831162214279175, "learning_rate": 1.363117837462899e-05, "loss": 0.774, "step": 1857 }, { "epoch": 0.13079901443153819, "grad_norm": 2.2083566188812256, "learning_rate": 1.363066694606034e-05, "loss": 0.82, "step": 1858 }, { "epoch": 0.1308694121788103, "grad_norm": 2.231173038482666, "learning_rate": 1.363015517275731e-05, "loss": 0.7601, "step": 1859 }, { "epoch": 0.13093980992608237, "grad_norm": 2.01057767868042, "learning_rate": 1.3629643054746508e-05, "loss": 0.7534, "step": 1860 }, { "epoch": 0.13101020767335445, "grad_norm": 2.2020952701568604, "learning_rate": 1.3629130592054558e-05, "loss": 0.8813, "step": 1861 }, { "epoch": 0.13108060542062655, "grad_norm": 2.7038967609405518, "learning_rate": 1.3628617784708106e-05, "loss": 0.7683, "step": 1862 }, { "epoch": 0.13115100316789863, "grad_norm": 2.4637300968170166, "learning_rate": 1.3628104632733813e-05, "loss": 0.8272, "step": 1863 }, { "epoch": 0.1312214009151707, "grad_norm": 3.190948486328125, "learning_rate": 1.3627591136158356e-05, "loss": 0.8552, "step": 1864 }, { "epoch": 0.1312917986624428, "grad_norm": 2.393796443939209, "learning_rate": 1.3627077295008435e-05, "loss": 0.7554, "step": 1865 }, { "epoch": 0.1313621964097149, "grad_norm": 3.1703975200653076, "learning_rate": 1.362656310931076e-05, "loss": 0.7323, "step": 1866 }, { "epoch": 0.13143259415698696, "grad_norm": 2.4044559001922607, "learning_rate": 1.3626048579092071e-05, "loss": 0.8052, "step": 1867 }, { "epoch": 0.13150299190425907, "grad_norm": 2.2382147312164307, "learning_rate": 1.3625533704379115e-05, "loss": 0.8178, "step": 1868 }, { "epoch": 0.13157338965153115, "grad_norm": 3.024627447128296, "learning_rate": 1.3625018485198661e-05, "loss": 0.7555, "step": 1869 }, { "epoch": 0.13164378739880322, "grad_norm": 2.018017292022705, "learning_rate": 1.3624502921577497e-05, "loss": 0.7941, "step": 1870 }, { "epoch": 0.13171418514607533, "grad_norm": 2.8861565589904785, "learning_rate": 1.3623987013542425e-05, "loss": 0.7562, "step": 1871 }, { "epoch": 0.1317845828933474, "grad_norm": 2.131601333618164, "learning_rate": 1.362347076112027e-05, "loss": 0.7117, "step": 1872 }, { "epoch": 0.1318549806406195, "grad_norm": 2.5671279430389404, "learning_rate": 1.362295416433787e-05, "loss": 0.7809, "step": 1873 }, { "epoch": 0.1319253783878916, "grad_norm": 2.495081663131714, "learning_rate": 1.3622437223222087e-05, "loss": 0.7405, "step": 1874 }, { "epoch": 0.13199577613516367, "grad_norm": 2.019268751144409, "learning_rate": 1.3621919937799795e-05, "loss": 0.8142, "step": 1875 }, { "epoch": 0.13206617388243577, "grad_norm": 2.019686460494995, "learning_rate": 1.3621402308097887e-05, "loss": 0.8295, "step": 1876 }, { "epoch": 0.13213657162970785, "grad_norm": 2.1739399433135986, "learning_rate": 1.3620884334143278e-05, "loss": 0.8134, "step": 1877 }, { "epoch": 0.13220696937697993, "grad_norm": 2.3830435276031494, "learning_rate": 1.3620366015962896e-05, "loss": 0.7498, "step": 1878 }, { "epoch": 0.13227736712425203, "grad_norm": 2.5044844150543213, "learning_rate": 1.3619847353583687e-05, "loss": 0.7337, "step": 1879 }, { "epoch": 0.1323477648715241, "grad_norm": 2.208521604537964, "learning_rate": 1.3619328347032619e-05, "loss": 0.7802, "step": 1880 }, { "epoch": 0.1324181626187962, "grad_norm": 2.6792333126068115, "learning_rate": 1.3618808996336676e-05, "loss": 0.8057, "step": 1881 }, { "epoch": 0.1324885603660683, "grad_norm": 1.9486831426620483, "learning_rate": 1.361828930152286e-05, "loss": 0.7195, "step": 1882 }, { "epoch": 0.13255895811334037, "grad_norm": 2.2408807277679443, "learning_rate": 1.3617769262618188e-05, "loss": 0.6922, "step": 1883 }, { "epoch": 0.13262935586061245, "grad_norm": 2.4251515865325928, "learning_rate": 1.3617248879649701e-05, "loss": 0.8559, "step": 1884 }, { "epoch": 0.13269975360788455, "grad_norm": 2.0979878902435303, "learning_rate": 1.361672815264445e-05, "loss": 0.6283, "step": 1885 }, { "epoch": 0.13277015135515663, "grad_norm": 2.894254207611084, "learning_rate": 1.361620708162951e-05, "loss": 0.8755, "step": 1886 }, { "epoch": 0.13284054910242873, "grad_norm": 2.005594491958618, "learning_rate": 1.3615685666631972e-05, "loss": 0.7578, "step": 1887 }, { "epoch": 0.1329109468497008, "grad_norm": 2.0733625888824463, "learning_rate": 1.3615163907678944e-05, "loss": 0.7767, "step": 1888 }, { "epoch": 0.1329813445969729, "grad_norm": 2.7533702850341797, "learning_rate": 1.3614641804797554e-05, "loss": 0.8469, "step": 1889 }, { "epoch": 0.133051742344245, "grad_norm": 2.415290355682373, "learning_rate": 1.3614119358014946e-05, "loss": 0.761, "step": 1890 }, { "epoch": 0.13312214009151707, "grad_norm": 2.079442262649536, "learning_rate": 1.3613596567358283e-05, "loss": 0.7533, "step": 1891 }, { "epoch": 0.13319253783878915, "grad_norm": 2.0873489379882812, "learning_rate": 1.3613073432854744e-05, "loss": 0.8722, "step": 1892 }, { "epoch": 0.13326293558606125, "grad_norm": 2.010880708694458, "learning_rate": 1.3612549954531528e-05, "loss": 0.7316, "step": 1893 }, { "epoch": 0.13333333333333333, "grad_norm": 2.19970965385437, "learning_rate": 1.3612026132415851e-05, "loss": 0.7758, "step": 1894 }, { "epoch": 0.1334037310806054, "grad_norm": 2.8357908725738525, "learning_rate": 1.3611501966534948e-05, "loss": 0.7816, "step": 1895 }, { "epoch": 0.13347412882787751, "grad_norm": 2.0144994258880615, "learning_rate": 1.361097745691607e-05, "loss": 0.721, "step": 1896 }, { "epoch": 0.1335445265751496, "grad_norm": 2.2422001361846924, "learning_rate": 1.3610452603586486e-05, "loss": 0.8258, "step": 1897 }, { "epoch": 0.13361492432242167, "grad_norm": 1.9337773323059082, "learning_rate": 1.3609927406573484e-05, "loss": 0.9022, "step": 1898 }, { "epoch": 0.13368532206969377, "grad_norm": 2.263606548309326, "learning_rate": 1.3609401865904372e-05, "loss": 0.785, "step": 1899 }, { "epoch": 0.13375571981696585, "grad_norm": 2.3631277084350586, "learning_rate": 1.360887598160647e-05, "loss": 0.6474, "step": 1900 }, { "epoch": 0.13382611756423796, "grad_norm": 2.476577043533325, "learning_rate": 1.360834975370712e-05, "loss": 0.7258, "step": 1901 }, { "epoch": 0.13389651531151003, "grad_norm": 2.17185378074646, "learning_rate": 1.3607823182233682e-05, "loss": 0.7332, "step": 1902 }, { "epoch": 0.1339669130587821, "grad_norm": 2.0851991176605225, "learning_rate": 1.3607296267213531e-05, "loss": 0.746, "step": 1903 }, { "epoch": 0.13403731080605422, "grad_norm": 2.326796770095825, "learning_rate": 1.3606769008674066e-05, "loss": 0.7286, "step": 1904 }, { "epoch": 0.1341077085533263, "grad_norm": 2.218841314315796, "learning_rate": 1.3606241406642692e-05, "loss": 0.8739, "step": 1905 }, { "epoch": 0.13417810630059837, "grad_norm": 2.5065817832946777, "learning_rate": 1.3605713461146849e-05, "loss": 0.8621, "step": 1906 }, { "epoch": 0.13424850404787048, "grad_norm": 2.5060646533966064, "learning_rate": 1.3605185172213977e-05, "loss": 0.7049, "step": 1907 }, { "epoch": 0.13431890179514255, "grad_norm": 2.225355863571167, "learning_rate": 1.360465653987155e-05, "loss": 0.7553, "step": 1908 }, { "epoch": 0.13438929954241463, "grad_norm": 2.3275983333587646, "learning_rate": 1.3604127564147044e-05, "loss": 0.7382, "step": 1909 }, { "epoch": 0.13445969728968674, "grad_norm": 2.308629035949707, "learning_rate": 1.3603598245067968e-05, "loss": 0.7007, "step": 1910 }, { "epoch": 0.1345300950369588, "grad_norm": 2.909109592437744, "learning_rate": 1.3603068582661838e-05, "loss": 0.7541, "step": 1911 }, { "epoch": 0.13460049278423092, "grad_norm": 2.424083948135376, "learning_rate": 1.360253857695619e-05, "loss": 0.8619, "step": 1912 }, { "epoch": 0.134670890531503, "grad_norm": 2.4796924591064453, "learning_rate": 1.3602008227978584e-05, "loss": 0.758, "step": 1913 }, { "epoch": 0.13474128827877507, "grad_norm": 2.1611218452453613, "learning_rate": 1.3601477535756591e-05, "loss": 0.7379, "step": 1914 }, { "epoch": 0.13481168602604718, "grad_norm": 2.286999225616455, "learning_rate": 1.3600946500317803e-05, "loss": 0.7163, "step": 1915 }, { "epoch": 0.13488208377331926, "grad_norm": 2.0519180297851562, "learning_rate": 1.3600415121689826e-05, "loss": 0.8041, "step": 1916 }, { "epoch": 0.13495248152059133, "grad_norm": 2.749436855316162, "learning_rate": 1.3599883399900289e-05, "loss": 0.8025, "step": 1917 }, { "epoch": 0.13502287926786344, "grad_norm": 2.311777353286743, "learning_rate": 1.359935133497684e-05, "loss": 0.8143, "step": 1918 }, { "epoch": 0.13509327701513552, "grad_norm": 2.494694948196411, "learning_rate": 1.3598818926947135e-05, "loss": 0.6401, "step": 1919 }, { "epoch": 0.1351636747624076, "grad_norm": 3.227447986602783, "learning_rate": 1.3598286175838858e-05, "loss": 0.7004, "step": 1920 }, { "epoch": 0.1352340725096797, "grad_norm": 2.2471258640289307, "learning_rate": 1.359775308167971e-05, "loss": 0.7483, "step": 1921 }, { "epoch": 0.13530447025695178, "grad_norm": 1.8263351917266846, "learning_rate": 1.3597219644497401e-05, "loss": 0.7589, "step": 1922 }, { "epoch": 0.13537486800422385, "grad_norm": 2.466553211212158, "learning_rate": 1.3596685864319669e-05, "loss": 0.855, "step": 1923 }, { "epoch": 0.13544526575149596, "grad_norm": 1.9169102907180786, "learning_rate": 1.3596151741174264e-05, "loss": 0.8291, "step": 1924 }, { "epoch": 0.13551566349876804, "grad_norm": 2.401426076889038, "learning_rate": 1.3595617275088956e-05, "loss": 0.7603, "step": 1925 }, { "epoch": 0.13558606124604014, "grad_norm": 2.499768018722534, "learning_rate": 1.3595082466091533e-05, "loss": 0.8387, "step": 1926 }, { "epoch": 0.13565645899331222, "grad_norm": 2.067671537399292, "learning_rate": 1.3594547314209801e-05, "loss": 0.7345, "step": 1927 }, { "epoch": 0.1357268567405843, "grad_norm": 2.19545316696167, "learning_rate": 1.359401181947158e-05, "loss": 0.8727, "step": 1928 }, { "epoch": 0.1357972544878564, "grad_norm": 1.876368761062622, "learning_rate": 1.3593475981904714e-05, "loss": 0.6973, "step": 1929 }, { "epoch": 0.13586765223512848, "grad_norm": 2.378824472427368, "learning_rate": 1.3592939801537061e-05, "loss": 0.8822, "step": 1930 }, { "epoch": 0.13593804998240056, "grad_norm": 3.0910491943359375, "learning_rate": 1.3592403278396494e-05, "loss": 0.6914, "step": 1931 }, { "epoch": 0.13600844772967266, "grad_norm": 2.362002372741699, "learning_rate": 1.3591866412510913e-05, "loss": 0.7972, "step": 1932 }, { "epoch": 0.13607884547694474, "grad_norm": 2.2485787868499756, "learning_rate": 1.3591329203908224e-05, "loss": 0.8505, "step": 1933 }, { "epoch": 0.13614924322421681, "grad_norm": 2.3771824836730957, "learning_rate": 1.3590791652616362e-05, "loss": 0.8222, "step": 1934 }, { "epoch": 0.13621964097148892, "grad_norm": 2.6254820823669434, "learning_rate": 1.3590253758663273e-05, "loss": 0.7714, "step": 1935 }, { "epoch": 0.136290038718761, "grad_norm": 2.1136019229888916, "learning_rate": 1.3589715522076925e-05, "loss": 0.7104, "step": 1936 }, { "epoch": 0.13636043646603307, "grad_norm": 2.489468812942505, "learning_rate": 1.3589176942885294e-05, "loss": 0.7752, "step": 1937 }, { "epoch": 0.13643083421330518, "grad_norm": 2.2035090923309326, "learning_rate": 1.3588638021116389e-05, "loss": 0.8186, "step": 1938 }, { "epoch": 0.13650123196057726, "grad_norm": 1.7516924142837524, "learning_rate": 1.3588098756798225e-05, "loss": 0.6657, "step": 1939 }, { "epoch": 0.13657162970784936, "grad_norm": 2.3919460773468018, "learning_rate": 1.3587559149958843e-05, "loss": 0.7708, "step": 1940 }, { "epoch": 0.13664202745512144, "grad_norm": 2.4444799423217773, "learning_rate": 1.3587019200626292e-05, "loss": 0.8476, "step": 1941 }, { "epoch": 0.13671242520239352, "grad_norm": 2.5161588191986084, "learning_rate": 1.3586478908828649e-05, "loss": 0.6873, "step": 1942 }, { "epoch": 0.13678282294966562, "grad_norm": 2.228607654571533, "learning_rate": 1.3585938274594003e-05, "loss": 0.7474, "step": 1943 }, { "epoch": 0.1368532206969377, "grad_norm": 2.4045674800872803, "learning_rate": 1.358539729795046e-05, "loss": 0.7818, "step": 1944 }, { "epoch": 0.13692361844420978, "grad_norm": 1.9466960430145264, "learning_rate": 1.3584855978926146e-05, "loss": 0.7054, "step": 1945 }, { "epoch": 0.13699401619148188, "grad_norm": 2.2940680980682373, "learning_rate": 1.3584314317549208e-05, "loss": 0.7498, "step": 1946 }, { "epoch": 0.13706441393875396, "grad_norm": 2.4398934841156006, "learning_rate": 1.3583772313847805e-05, "loss": 0.7194, "step": 1947 }, { "epoch": 0.13713481168602604, "grad_norm": 1.9971987009048462, "learning_rate": 1.3583229967850117e-05, "loss": 0.8609, "step": 1948 }, { "epoch": 0.13720520943329814, "grad_norm": 2.4983556270599365, "learning_rate": 1.3582687279584341e-05, "loss": 0.8667, "step": 1949 }, { "epoch": 0.13727560718057022, "grad_norm": 1.9318809509277344, "learning_rate": 1.3582144249078692e-05, "loss": 0.7655, "step": 1950 }, { "epoch": 0.1373460049278423, "grad_norm": 1.9988089799880981, "learning_rate": 1.3581600876361401e-05, "loss": 0.808, "step": 1951 }, { "epoch": 0.1374164026751144, "grad_norm": 2.1849348545074463, "learning_rate": 1.3581057161460723e-05, "loss": 0.7175, "step": 1952 }, { "epoch": 0.13748680042238648, "grad_norm": 2.3738396167755127, "learning_rate": 1.3580513104404919e-05, "loss": 0.7448, "step": 1953 }, { "epoch": 0.13755719816965858, "grad_norm": 2.326361894607544, "learning_rate": 1.357996870522228e-05, "loss": 0.6477, "step": 1954 }, { "epoch": 0.13762759591693066, "grad_norm": 1.6295857429504395, "learning_rate": 1.3579423963941111e-05, "loss": 0.7371, "step": 1955 }, { "epoch": 0.13769799366420274, "grad_norm": 2.1350173950195312, "learning_rate": 1.3578878880589729e-05, "loss": 0.8107, "step": 1956 }, { "epoch": 0.13776839141147484, "grad_norm": 2.0894556045532227, "learning_rate": 1.3578333455196477e-05, "loss": 0.7772, "step": 1957 }, { "epoch": 0.13783878915874692, "grad_norm": 2.438178300857544, "learning_rate": 1.3577787687789711e-05, "loss": 0.7477, "step": 1958 }, { "epoch": 0.137909186906019, "grad_norm": 2.361396312713623, "learning_rate": 1.3577241578397805e-05, "loss": 0.7835, "step": 1959 }, { "epoch": 0.1379795846532911, "grad_norm": 2.260019063949585, "learning_rate": 1.3576695127049153e-05, "loss": 0.743, "step": 1960 }, { "epoch": 0.13804998240056318, "grad_norm": 1.9856444597244263, "learning_rate": 1.3576148333772167e-05, "loss": 0.7094, "step": 1961 }, { "epoch": 0.13812038014783526, "grad_norm": 2.427976131439209, "learning_rate": 1.357560119859527e-05, "loss": 0.68, "step": 1962 }, { "epoch": 0.13819077789510736, "grad_norm": 2.2321524620056152, "learning_rate": 1.3575053721546913e-05, "loss": 0.7702, "step": 1963 }, { "epoch": 0.13826117564237944, "grad_norm": 1.902010440826416, "learning_rate": 1.357450590265556e-05, "loss": 0.8051, "step": 1964 }, { "epoch": 0.13833157338965152, "grad_norm": 2.2959933280944824, "learning_rate": 1.3573957741949689e-05, "loss": 0.8841, "step": 1965 }, { "epoch": 0.13840197113692362, "grad_norm": 2.3663811683654785, "learning_rate": 1.35734092394578e-05, "loss": 0.758, "step": 1966 }, { "epoch": 0.1384723688841957, "grad_norm": 2.561871290206909, "learning_rate": 1.3572860395208415e-05, "loss": 0.7589, "step": 1967 }, { "epoch": 0.1385427666314678, "grad_norm": 2.1267249584198, "learning_rate": 1.3572311209230063e-05, "loss": 0.6337, "step": 1968 }, { "epoch": 0.13861316437873988, "grad_norm": 2.2744343280792236, "learning_rate": 1.35717616815513e-05, "loss": 0.7442, "step": 1969 }, { "epoch": 0.13868356212601196, "grad_norm": 2.9091222286224365, "learning_rate": 1.3571211812200694e-05, "loss": 0.8805, "step": 1970 }, { "epoch": 0.13875395987328407, "grad_norm": 5.055299282073975, "learning_rate": 1.3570661601206836e-05, "loss": 0.8747, "step": 1971 }, { "epoch": 0.13882435762055614, "grad_norm": 2.4621737003326416, "learning_rate": 1.3570111048598332e-05, "loss": 0.6658, "step": 1972 }, { "epoch": 0.13889475536782822, "grad_norm": 2.569411039352417, "learning_rate": 1.3569560154403802e-05, "loss": 0.8446, "step": 1973 }, { "epoch": 0.13896515311510033, "grad_norm": 2.068253517150879, "learning_rate": 1.356900891865189e-05, "loss": 0.8468, "step": 1974 }, { "epoch": 0.1390355508623724, "grad_norm": 2.315368175506592, "learning_rate": 1.3568457341371255e-05, "loss": 0.7799, "step": 1975 }, { "epoch": 0.13910594860964448, "grad_norm": 2.039508819580078, "learning_rate": 1.3567905422590573e-05, "loss": 0.8208, "step": 1976 }, { "epoch": 0.13917634635691659, "grad_norm": 2.2159430980682373, "learning_rate": 1.3567353162338543e-05, "loss": 0.7069, "step": 1977 }, { "epoch": 0.13924674410418866, "grad_norm": 2.021608591079712, "learning_rate": 1.3566800560643873e-05, "loss": 0.8039, "step": 1978 }, { "epoch": 0.13931714185146074, "grad_norm": 2.191493034362793, "learning_rate": 1.3566247617535295e-05, "loss": 0.7746, "step": 1979 }, { "epoch": 0.13938753959873285, "grad_norm": 2.211601734161377, "learning_rate": 1.3565694333041556e-05, "loss": 0.7813, "step": 1980 }, { "epoch": 0.13945793734600492, "grad_norm": 2.5179941654205322, "learning_rate": 1.3565140707191422e-05, "loss": 0.8092, "step": 1981 }, { "epoch": 0.13952833509327703, "grad_norm": 1.8915427923202515, "learning_rate": 1.3564586740013677e-05, "loss": 0.559, "step": 1982 }, { "epoch": 0.1395987328405491, "grad_norm": 2.2617223262786865, "learning_rate": 1.356403243153712e-05, "loss": 0.7995, "step": 1983 }, { "epoch": 0.13966913058782118, "grad_norm": 2.1160953044891357, "learning_rate": 1.3563477781790575e-05, "loss": 0.7255, "step": 1984 }, { "epoch": 0.1397395283350933, "grad_norm": 2.628541946411133, "learning_rate": 1.3562922790802877e-05, "loss": 0.7983, "step": 1985 }, { "epoch": 0.13980992608236537, "grad_norm": 1.8303766250610352, "learning_rate": 1.3562367458602878e-05, "loss": 0.621, "step": 1986 }, { "epoch": 0.13988032382963744, "grad_norm": 2.4398915767669678, "learning_rate": 1.356181178521945e-05, "loss": 0.7953, "step": 1987 }, { "epoch": 0.13995072157690955, "grad_norm": 2.1239535808563232, "learning_rate": 1.3561255770681487e-05, "loss": 0.8115, "step": 1988 }, { "epoch": 0.14002111932418163, "grad_norm": 2.0967984199523926, "learning_rate": 1.3560699415017893e-05, "loss": 0.7181, "step": 1989 }, { "epoch": 0.1400915170714537, "grad_norm": 2.217789888381958, "learning_rate": 1.3560142718257594e-05, "loss": 0.7811, "step": 1990 }, { "epoch": 0.1401619148187258, "grad_norm": 1.9287405014038086, "learning_rate": 1.3559585680429536e-05, "loss": 0.6843, "step": 1991 }, { "epoch": 0.14023231256599789, "grad_norm": 2.2977802753448486, "learning_rate": 1.3559028301562676e-05, "loss": 0.8362, "step": 1992 }, { "epoch": 0.14030271031326996, "grad_norm": 2.161982536315918, "learning_rate": 1.3558470581685996e-05, "loss": 0.7367, "step": 1993 }, { "epoch": 0.14037310806054207, "grad_norm": 2.167020797729492, "learning_rate": 1.3557912520828488e-05, "loss": 0.7284, "step": 1994 }, { "epoch": 0.14044350580781415, "grad_norm": 2.2795302867889404, "learning_rate": 1.355735411901917e-05, "loss": 0.7943, "step": 1995 }, { "epoch": 0.14051390355508625, "grad_norm": 2.1784658432006836, "learning_rate": 1.3556795376287075e-05, "loss": 0.7391, "step": 1996 }, { "epoch": 0.14058430130235833, "grad_norm": 2.34366774559021, "learning_rate": 1.3556236292661248e-05, "loss": 0.8839, "step": 1997 }, { "epoch": 0.1406546990496304, "grad_norm": 2.379434108734131, "learning_rate": 1.3555676868170758e-05, "loss": 0.6602, "step": 1998 }, { "epoch": 0.1407250967969025, "grad_norm": 2.2524821758270264, "learning_rate": 1.3555117102844692e-05, "loss": 0.7744, "step": 1999 }, { "epoch": 0.1407954945441746, "grad_norm": 1.9590221643447876, "learning_rate": 1.3554556996712148e-05, "loss": 0.6463, "step": 2000 }, { "epoch": 0.14086589229144666, "grad_norm": 1.9231648445129395, "learning_rate": 1.3553996549802252e-05, "loss": 0.669, "step": 2001 }, { "epoch": 0.14093629003871877, "grad_norm": 2.07080078125, "learning_rate": 1.3553435762144141e-05, "loss": 0.6993, "step": 2002 }, { "epoch": 0.14100668778599085, "grad_norm": 1.9978841543197632, "learning_rate": 1.3552874633766967e-05, "loss": 0.7055, "step": 2003 }, { "epoch": 0.14107708553326292, "grad_norm": 2.1399142742156982, "learning_rate": 1.3552313164699908e-05, "loss": 0.6853, "step": 2004 }, { "epoch": 0.14114748328053503, "grad_norm": 1.893038034439087, "learning_rate": 1.3551751354972154e-05, "loss": 0.7835, "step": 2005 }, { "epoch": 0.1412178810278071, "grad_norm": 2.2094383239746094, "learning_rate": 1.3551189204612912e-05, "loss": 0.6954, "step": 2006 }, { "epoch": 0.14128827877507918, "grad_norm": 2.437511444091797, "learning_rate": 1.355062671365141e-05, "loss": 0.7354, "step": 2007 }, { "epoch": 0.1413586765223513, "grad_norm": 2.0128026008605957, "learning_rate": 1.3550063882116892e-05, "loss": 0.7522, "step": 2008 }, { "epoch": 0.14142907426962337, "grad_norm": 2.251699686050415, "learning_rate": 1.3549500710038624e-05, "loss": 0.7726, "step": 2009 }, { "epoch": 0.14149947201689547, "grad_norm": 2.379786252975464, "learning_rate": 1.354893719744588e-05, "loss": 0.7993, "step": 2010 }, { "epoch": 0.14156986976416755, "grad_norm": 2.4828431606292725, "learning_rate": 1.3548373344367962e-05, "loss": 0.8145, "step": 2011 }, { "epoch": 0.14164026751143963, "grad_norm": 1.9900507926940918, "learning_rate": 1.354780915083418e-05, "loss": 0.7595, "step": 2012 }, { "epoch": 0.14171066525871173, "grad_norm": 1.7197569608688354, "learning_rate": 1.3547244616873872e-05, "loss": 0.682, "step": 2013 }, { "epoch": 0.1417810630059838, "grad_norm": 2.0535356998443604, "learning_rate": 1.3546679742516386e-05, "loss": 0.7058, "step": 2014 }, { "epoch": 0.1418514607532559, "grad_norm": 2.3813140392303467, "learning_rate": 1.3546114527791092e-05, "loss": 0.7853, "step": 2015 }, { "epoch": 0.141921858500528, "grad_norm": 2.0817389488220215, "learning_rate": 1.3545548972727374e-05, "loss": 0.8953, "step": 2016 }, { "epoch": 0.14199225624780007, "grad_norm": 2.2234771251678467, "learning_rate": 1.354498307735464e-05, "loss": 0.8031, "step": 2017 }, { "epoch": 0.14206265399507215, "grad_norm": 2.1697850227355957, "learning_rate": 1.3544416841702305e-05, "loss": 0.8644, "step": 2018 }, { "epoch": 0.14213305174234425, "grad_norm": 2.5696005821228027, "learning_rate": 1.3543850265799814e-05, "loss": 0.8473, "step": 2019 }, { "epoch": 0.14220344948961633, "grad_norm": 2.407658338546753, "learning_rate": 1.3543283349676618e-05, "loss": 0.7064, "step": 2020 }, { "epoch": 0.1422738472368884, "grad_norm": 2.0372283458709717, "learning_rate": 1.3542716093362198e-05, "loss": 0.9041, "step": 2021 }, { "epoch": 0.1423442449841605, "grad_norm": 2.496581792831421, "learning_rate": 1.354214849688604e-05, "loss": 0.7259, "step": 2022 }, { "epoch": 0.1424146427314326, "grad_norm": 2.0073697566986084, "learning_rate": 1.3541580560277659e-05, "loss": 0.6762, "step": 2023 }, { "epoch": 0.1424850404787047, "grad_norm": 2.1163651943206787, "learning_rate": 1.3541012283566579e-05, "loss": 0.6834, "step": 2024 }, { "epoch": 0.14255543822597677, "grad_norm": 2.4263651371002197, "learning_rate": 1.3540443666782347e-05, "loss": 0.8514, "step": 2025 }, { "epoch": 0.14262583597324885, "grad_norm": 2.0010266304016113, "learning_rate": 1.3539874709954525e-05, "loss": 0.7415, "step": 2026 }, { "epoch": 0.14269623372052095, "grad_norm": 2.3978538513183594, "learning_rate": 1.3539305413112693e-05, "loss": 0.6778, "step": 2027 }, { "epoch": 0.14276663146779303, "grad_norm": 2.4727344512939453, "learning_rate": 1.3538735776286452e-05, "loss": 0.7823, "step": 2028 }, { "epoch": 0.1428370292150651, "grad_norm": 2.0970239639282227, "learning_rate": 1.3538165799505416e-05, "loss": 0.674, "step": 2029 }, { "epoch": 0.14290742696233721, "grad_norm": 2.273420572280884, "learning_rate": 1.3537595482799217e-05, "loss": 0.7748, "step": 2030 }, { "epoch": 0.1429778247096093, "grad_norm": 2.014585494995117, "learning_rate": 1.353702482619751e-05, "loss": 0.7278, "step": 2031 }, { "epoch": 0.14304822245688137, "grad_norm": 1.9579099416732788, "learning_rate": 1.3536453829729963e-05, "loss": 0.6075, "step": 2032 }, { "epoch": 0.14311862020415347, "grad_norm": 1.848496913909912, "learning_rate": 1.3535882493426261e-05, "loss": 0.7648, "step": 2033 }, { "epoch": 0.14318901795142555, "grad_norm": 1.948378324508667, "learning_rate": 1.3535310817316107e-05, "loss": 0.7598, "step": 2034 }, { "epoch": 0.14325941569869763, "grad_norm": 2.6666452884674072, "learning_rate": 1.3534738801429225e-05, "loss": 0.82, "step": 2035 }, { "epoch": 0.14332981344596973, "grad_norm": 2.0397098064422607, "learning_rate": 1.3534166445795355e-05, "loss": 0.8616, "step": 2036 }, { "epoch": 0.1434002111932418, "grad_norm": 1.5949040651321411, "learning_rate": 1.3533593750444255e-05, "loss": 0.9202, "step": 2037 }, { "epoch": 0.14347060894051392, "grad_norm": 2.1729824542999268, "learning_rate": 1.3533020715405696e-05, "loss": 0.7757, "step": 2038 }, { "epoch": 0.143541006687786, "grad_norm": 1.9796372652053833, "learning_rate": 1.3532447340709477e-05, "loss": 0.7831, "step": 2039 }, { "epoch": 0.14361140443505807, "grad_norm": 2.455202341079712, "learning_rate": 1.3531873626385404e-05, "loss": 0.7406, "step": 2040 }, { "epoch": 0.14368180218233018, "grad_norm": 2.6734743118286133, "learning_rate": 1.3531299572463303e-05, "loss": 0.753, "step": 2041 }, { "epoch": 0.14375219992960225, "grad_norm": 2.151664972305298, "learning_rate": 1.3530725178973024e-05, "loss": 0.7391, "step": 2042 }, { "epoch": 0.14382259767687433, "grad_norm": 1.8828901052474976, "learning_rate": 1.3530150445944428e-05, "loss": 0.6766, "step": 2043 }, { "epoch": 0.14389299542414644, "grad_norm": 2.1596450805664062, "learning_rate": 1.3529575373407397e-05, "loss": 0.7485, "step": 2044 }, { "epoch": 0.1439633931714185, "grad_norm": 2.6462273597717285, "learning_rate": 1.3528999961391827e-05, "loss": 0.7524, "step": 2045 }, { "epoch": 0.1440337909186906, "grad_norm": 2.6336793899536133, "learning_rate": 1.3528424209927637e-05, "loss": 0.8082, "step": 2046 }, { "epoch": 0.1441041886659627, "grad_norm": 2.2029147148132324, "learning_rate": 1.3527848119044759e-05, "loss": 0.6921, "step": 2047 }, { "epoch": 0.14417458641323477, "grad_norm": 2.041109800338745, "learning_rate": 1.3527271688773148e-05, "loss": 0.6488, "step": 2048 }, { "epoch": 0.14424498416050685, "grad_norm": 2.4333689212799072, "learning_rate": 1.3526694919142769e-05, "loss": 0.9409, "step": 2049 }, { "epoch": 0.14431538190777896, "grad_norm": 2.4270708560943604, "learning_rate": 1.352611781018361e-05, "loss": 0.714, "step": 2050 }, { "epoch": 0.14438577965505103, "grad_norm": 2.2437524795532227, "learning_rate": 1.3525540361925673e-05, "loss": 0.7448, "step": 2051 }, { "epoch": 0.14445617740232314, "grad_norm": 2.15356183052063, "learning_rate": 1.3524962574398986e-05, "loss": 0.7838, "step": 2052 }, { "epoch": 0.14452657514959522, "grad_norm": 2.0781919956207275, "learning_rate": 1.3524384447633585e-05, "loss": 0.7417, "step": 2053 }, { "epoch": 0.1445969728968673, "grad_norm": 1.841194987297058, "learning_rate": 1.3523805981659528e-05, "loss": 0.7822, "step": 2054 }, { "epoch": 0.1446673706441394, "grad_norm": 1.8723855018615723, "learning_rate": 1.3523227176506889e-05, "loss": 0.7409, "step": 2055 }, { "epoch": 0.14473776839141148, "grad_norm": 2.0017099380493164, "learning_rate": 1.3522648032205761e-05, "loss": 0.7193, "step": 2056 }, { "epoch": 0.14480816613868355, "grad_norm": 2.3983771800994873, "learning_rate": 1.3522068548786256e-05, "loss": 0.9911, "step": 2057 }, { "epoch": 0.14487856388595566, "grad_norm": 1.853318452835083, "learning_rate": 1.3521488726278499e-05, "loss": 0.8539, "step": 2058 }, { "epoch": 0.14494896163322774, "grad_norm": 2.0474300384521484, "learning_rate": 1.3520908564712637e-05, "loss": 0.7732, "step": 2059 }, { "epoch": 0.1450193593804998, "grad_norm": 2.006171226501465, "learning_rate": 1.3520328064118832e-05, "loss": 0.7238, "step": 2060 }, { "epoch": 0.14508975712777192, "grad_norm": 2.271385908126831, "learning_rate": 1.3519747224527268e-05, "loss": 0.7742, "step": 2061 }, { "epoch": 0.145160154875044, "grad_norm": 2.356719732284546, "learning_rate": 1.351916604596814e-05, "loss": 0.8425, "step": 2062 }, { "epoch": 0.14523055262231607, "grad_norm": 2.22481632232666, "learning_rate": 1.3518584528471665e-05, "loss": 0.7367, "step": 2063 }, { "epoch": 0.14530095036958818, "grad_norm": 2.057844400405884, "learning_rate": 1.3518002672068077e-05, "loss": 0.7517, "step": 2064 }, { "epoch": 0.14537134811686026, "grad_norm": 2.453312635421753, "learning_rate": 1.3517420476787628e-05, "loss": 0.7135, "step": 2065 }, { "epoch": 0.14544174586413236, "grad_norm": 2.274406909942627, "learning_rate": 1.3516837942660584e-05, "loss": 0.7223, "step": 2066 }, { "epoch": 0.14551214361140444, "grad_norm": 2.0773041248321533, "learning_rate": 1.3516255069717235e-05, "loss": 0.7632, "step": 2067 }, { "epoch": 0.14558254135867651, "grad_norm": 2.1532270908355713, "learning_rate": 1.3515671857987884e-05, "loss": 0.8107, "step": 2068 }, { "epoch": 0.14565293910594862, "grad_norm": 2.115966558456421, "learning_rate": 1.3515088307502851e-05, "loss": 0.7769, "step": 2069 }, { "epoch": 0.1457233368532207, "grad_norm": 2.081608772277832, "learning_rate": 1.3514504418292476e-05, "loss": 0.7526, "step": 2070 }, { "epoch": 0.14579373460049277, "grad_norm": 2.567854881286621, "learning_rate": 1.3513920190387117e-05, "loss": 0.7062, "step": 2071 }, { "epoch": 0.14586413234776488, "grad_norm": 2.0827255249023438, "learning_rate": 1.3513335623817147e-05, "loss": 0.8408, "step": 2072 }, { "epoch": 0.14593453009503696, "grad_norm": 2.1328165531158447, "learning_rate": 1.3512750718612961e-05, "loss": 0.7661, "step": 2073 }, { "epoch": 0.14600492784230903, "grad_norm": 2.272878885269165, "learning_rate": 1.3512165474804967e-05, "loss": 0.6991, "step": 2074 }, { "epoch": 0.14607532558958114, "grad_norm": 2.15435528755188, "learning_rate": 1.351157989242359e-05, "loss": 0.8042, "step": 2075 }, { "epoch": 0.14614572333685322, "grad_norm": 2.0213234424591064, "learning_rate": 1.3510993971499278e-05, "loss": 0.8067, "step": 2076 }, { "epoch": 0.1462161210841253, "grad_norm": 1.9892933368682861, "learning_rate": 1.3510407712062494e-05, "loss": 0.7274, "step": 2077 }, { "epoch": 0.1462865188313974, "grad_norm": 2.292405128479004, "learning_rate": 1.3509821114143713e-05, "loss": 0.7592, "step": 2078 }, { "epoch": 0.14635691657866948, "grad_norm": 2.3261070251464844, "learning_rate": 1.3509234177773443e-05, "loss": 0.6977, "step": 2079 }, { "epoch": 0.14642731432594158, "grad_norm": 1.928760051727295, "learning_rate": 1.3508646902982186e-05, "loss": 0.6311, "step": 2080 }, { "epoch": 0.14649771207321366, "grad_norm": 1.8416416645050049, "learning_rate": 1.3508059289800486e-05, "loss": 0.7095, "step": 2081 }, { "epoch": 0.14656810982048574, "grad_norm": 1.9654814004898071, "learning_rate": 1.350747133825889e-05, "loss": 0.5989, "step": 2082 }, { "epoch": 0.14663850756775784, "grad_norm": 2.245328903198242, "learning_rate": 1.3506883048387965e-05, "loss": 0.7371, "step": 2083 }, { "epoch": 0.14670890531502992, "grad_norm": 2.523977279663086, "learning_rate": 1.3506294420218297e-05, "loss": 0.8938, "step": 2084 }, { "epoch": 0.146779303062302, "grad_norm": 2.5129518508911133, "learning_rate": 1.3505705453780489e-05, "loss": 0.8447, "step": 2085 }, { "epoch": 0.1468497008095741, "grad_norm": 1.8679391145706177, "learning_rate": 1.3505116149105162e-05, "loss": 0.7379, "step": 2086 }, { "epoch": 0.14692009855684618, "grad_norm": 2.142808437347412, "learning_rate": 1.3504526506222958e-05, "loss": 0.6876, "step": 2087 }, { "epoch": 0.14699049630411826, "grad_norm": 2.4002599716186523, "learning_rate": 1.3503936525164527e-05, "loss": 0.915, "step": 2088 }, { "epoch": 0.14706089405139036, "grad_norm": 2.4998977184295654, "learning_rate": 1.3503346205960546e-05, "loss": 0.6752, "step": 2089 }, { "epoch": 0.14713129179866244, "grad_norm": 2.238266706466675, "learning_rate": 1.3502755548641706e-05, "loss": 0.8425, "step": 2090 }, { "epoch": 0.14720168954593452, "grad_norm": 1.77357017993927, "learning_rate": 1.3502164553238716e-05, "loss": 0.66, "step": 2091 }, { "epoch": 0.14727208729320662, "grad_norm": 2.413443088531494, "learning_rate": 1.3501573219782304e-05, "loss": 0.7532, "step": 2092 }, { "epoch": 0.1473424850404787, "grad_norm": 2.1906399726867676, "learning_rate": 1.3500981548303212e-05, "loss": 0.7516, "step": 2093 }, { "epoch": 0.1474128827877508, "grad_norm": 1.9895716905593872, "learning_rate": 1.35003895388322e-05, "loss": 0.6415, "step": 2094 }, { "epoch": 0.14748328053502288, "grad_norm": 2.1136491298675537, "learning_rate": 1.349979719140005e-05, "loss": 0.8296, "step": 2095 }, { "epoch": 0.14755367828229496, "grad_norm": 1.871099829673767, "learning_rate": 1.3499204506037556e-05, "loss": 0.8159, "step": 2096 }, { "epoch": 0.14762407602956706, "grad_norm": 2.1271631717681885, "learning_rate": 1.3498611482775534e-05, "loss": 0.8054, "step": 2097 }, { "epoch": 0.14769447377683914, "grad_norm": 2.406583309173584, "learning_rate": 1.3498018121644817e-05, "loss": 0.7479, "step": 2098 }, { "epoch": 0.14776487152411122, "grad_norm": 1.696142315864563, "learning_rate": 1.3497424422676252e-05, "loss": 0.7971, "step": 2099 }, { "epoch": 0.14783526927138332, "grad_norm": 2.1341099739074707, "learning_rate": 1.3496830385900707e-05, "loss": 0.8204, "step": 2100 }, { "epoch": 0.1479056670186554, "grad_norm": 2.368579864501953, "learning_rate": 1.3496236011349066e-05, "loss": 0.757, "step": 2101 }, { "epoch": 0.14797606476592748, "grad_norm": 4.759779453277588, "learning_rate": 1.3495641299052231e-05, "loss": 0.7203, "step": 2102 }, { "epoch": 0.14804646251319958, "grad_norm": 2.6825926303863525, "learning_rate": 1.3495046249041123e-05, "loss": 0.7415, "step": 2103 }, { "epoch": 0.14811686026047166, "grad_norm": 1.8315502405166626, "learning_rate": 1.3494450861346677e-05, "loss": 0.7361, "step": 2104 }, { "epoch": 0.14818725800774374, "grad_norm": 2.150681495666504, "learning_rate": 1.349385513599985e-05, "loss": 0.7164, "step": 2105 }, { "epoch": 0.14825765575501584, "grad_norm": 2.04199481010437, "learning_rate": 1.3493259073031614e-05, "loss": 0.8232, "step": 2106 }, { "epoch": 0.14832805350228792, "grad_norm": 2.420827627182007, "learning_rate": 1.3492662672472957e-05, "loss": 0.7505, "step": 2107 }, { "epoch": 0.14839845124956003, "grad_norm": 1.9811785221099854, "learning_rate": 1.3492065934354888e-05, "loss": 0.7801, "step": 2108 }, { "epoch": 0.1484688489968321, "grad_norm": 1.921058177947998, "learning_rate": 1.3491468858708431e-05, "loss": 0.7536, "step": 2109 }, { "epoch": 0.14853924674410418, "grad_norm": 2.300747871398926, "learning_rate": 1.349087144556463e-05, "loss": 0.8029, "step": 2110 }, { "epoch": 0.14860964449137629, "grad_norm": 1.9932628870010376, "learning_rate": 1.3490273694954543e-05, "loss": 0.7183, "step": 2111 }, { "epoch": 0.14868004223864836, "grad_norm": 2.3676726818084717, "learning_rate": 1.348967560690925e-05, "loss": 0.7235, "step": 2112 }, { "epoch": 0.14875043998592044, "grad_norm": 2.330206871032715, "learning_rate": 1.3489077181459843e-05, "loss": 0.7896, "step": 2113 }, { "epoch": 0.14882083773319255, "grad_norm": 1.9547412395477295, "learning_rate": 1.3488478418637438e-05, "loss": 0.7492, "step": 2114 }, { "epoch": 0.14889123548046462, "grad_norm": 1.8493527173995972, "learning_rate": 1.3487879318473163e-05, "loss": 0.8735, "step": 2115 }, { "epoch": 0.1489616332277367, "grad_norm": 1.8976885080337524, "learning_rate": 1.3487279880998167e-05, "loss": 0.6917, "step": 2116 }, { "epoch": 0.1490320309750088, "grad_norm": 2.3138837814331055, "learning_rate": 1.3486680106243613e-05, "loss": 0.7224, "step": 2117 }, { "epoch": 0.14910242872228088, "grad_norm": 2.1206371784210205, "learning_rate": 1.3486079994240687e-05, "loss": 0.7869, "step": 2118 }, { "epoch": 0.14917282646955296, "grad_norm": 1.9168246984481812, "learning_rate": 1.3485479545020588e-05, "loss": 0.8178, "step": 2119 }, { "epoch": 0.14924322421682507, "grad_norm": 2.4285600185394287, "learning_rate": 1.3484878758614534e-05, "loss": 0.7171, "step": 2120 }, { "epoch": 0.14931362196409714, "grad_norm": 2.0414702892303467, "learning_rate": 1.348427763505376e-05, "loss": 0.7704, "step": 2121 }, { "epoch": 0.14938401971136925, "grad_norm": 1.9084241390228271, "learning_rate": 1.3483676174369518e-05, "loss": 0.8843, "step": 2122 }, { "epoch": 0.14945441745864133, "grad_norm": 2.732853651046753, "learning_rate": 1.3483074376593081e-05, "loss": 0.8176, "step": 2123 }, { "epoch": 0.1495248152059134, "grad_norm": 2.675346851348877, "learning_rate": 1.3482472241755736e-05, "loss": 0.713, "step": 2124 }, { "epoch": 0.1495952129531855, "grad_norm": 2.0974748134613037, "learning_rate": 1.3481869769888789e-05, "loss": 0.7289, "step": 2125 }, { "epoch": 0.14966561070045759, "grad_norm": 2.0021018981933594, "learning_rate": 1.3481266961023563e-05, "loss": 0.7983, "step": 2126 }, { "epoch": 0.14973600844772966, "grad_norm": 5.459007263183594, "learning_rate": 1.3480663815191395e-05, "loss": 0.794, "step": 2127 }, { "epoch": 0.14980640619500177, "grad_norm": 1.8780583143234253, "learning_rate": 1.3480060332423647e-05, "loss": 0.8177, "step": 2128 }, { "epoch": 0.14987680394227385, "grad_norm": 1.9384963512420654, "learning_rate": 1.3479456512751693e-05, "loss": 0.7537, "step": 2129 }, { "epoch": 0.14994720168954592, "grad_norm": 2.3309268951416016, "learning_rate": 1.3478852356206929e-05, "loss": 0.7708, "step": 2130 }, { "epoch": 0.15001759943681803, "grad_norm": 2.0053508281707764, "learning_rate": 1.3478247862820762e-05, "loss": 0.7254, "step": 2131 }, { "epoch": 0.1500879971840901, "grad_norm": 2.656656503677368, "learning_rate": 1.3477643032624621e-05, "loss": 0.7553, "step": 2132 }, { "epoch": 0.1501583949313622, "grad_norm": 2.056612491607666, "learning_rate": 1.3477037865649952e-05, "loss": 0.6966, "step": 2133 }, { "epoch": 0.1502287926786343, "grad_norm": 2.0227396488189697, "learning_rate": 1.3476432361928221e-05, "loss": 0.6852, "step": 2134 }, { "epoch": 0.15029919042590636, "grad_norm": 2.26312255859375, "learning_rate": 1.3475826521490904e-05, "loss": 0.8027, "step": 2135 }, { "epoch": 0.15036958817317847, "grad_norm": 2.1140329837799072, "learning_rate": 1.3475220344369502e-05, "loss": 0.7035, "step": 2136 }, { "epoch": 0.15043998592045055, "grad_norm": 2.215404987335205, "learning_rate": 1.347461383059553e-05, "loss": 0.7965, "step": 2137 }, { "epoch": 0.15051038366772262, "grad_norm": 2.0065503120422363, "learning_rate": 1.347400698020052e-05, "loss": 0.7142, "step": 2138 }, { "epoch": 0.15058078141499473, "grad_norm": 1.9836153984069824, "learning_rate": 1.3473399793216024e-05, "loss": 0.7518, "step": 2139 }, { "epoch": 0.1506511791622668, "grad_norm": 2.292107343673706, "learning_rate": 1.3472792269673612e-05, "loss": 0.8337, "step": 2140 }, { "epoch": 0.15072157690953888, "grad_norm": 3.4108846187591553, "learning_rate": 1.3472184409604866e-05, "loss": 0.672, "step": 2141 }, { "epoch": 0.150791974656811, "grad_norm": 2.554661512374878, "learning_rate": 1.3471576213041393e-05, "loss": 0.8581, "step": 2142 }, { "epoch": 0.15086237240408307, "grad_norm": 2.2160611152648926, "learning_rate": 1.3470967680014811e-05, "loss": 0.7696, "step": 2143 }, { "epoch": 0.15093277015135514, "grad_norm": 2.0324320793151855, "learning_rate": 1.3470358810556757e-05, "loss": 0.7195, "step": 2144 }, { "epoch": 0.15100316789862725, "grad_norm": 2.519993782043457, "learning_rate": 1.346974960469889e-05, "loss": 0.702, "step": 2145 }, { "epoch": 0.15107356564589933, "grad_norm": 2.2141342163085938, "learning_rate": 1.3469140062472882e-05, "loss": 0.674, "step": 2146 }, { "epoch": 0.15114396339317143, "grad_norm": 2.0812056064605713, "learning_rate": 1.346853018391042e-05, "loss": 0.7779, "step": 2147 }, { "epoch": 0.1512143611404435, "grad_norm": 2.650134563446045, "learning_rate": 1.346791996904322e-05, "loss": 0.7948, "step": 2148 }, { "epoch": 0.1512847588877156, "grad_norm": 2.4188122749328613, "learning_rate": 1.3467309417903002e-05, "loss": 0.7203, "step": 2149 }, { "epoch": 0.1513551566349877, "grad_norm": 1.969528317451477, "learning_rate": 1.3466698530521508e-05, "loss": 0.7485, "step": 2150 }, { "epoch": 0.15142555438225977, "grad_norm": 2.253983974456787, "learning_rate": 1.3466087306930504e-05, "loss": 0.724, "step": 2151 }, { "epoch": 0.15149595212953185, "grad_norm": 2.2035083770751953, "learning_rate": 1.3465475747161762e-05, "loss": 0.784, "step": 2152 }, { "epoch": 0.15156634987680395, "grad_norm": 2.592100143432617, "learning_rate": 1.3464863851247083e-05, "loss": 0.7148, "step": 2153 }, { "epoch": 0.15163674762407603, "grad_norm": 2.2732951641082764, "learning_rate": 1.3464251619218274e-05, "loss": 0.8334, "step": 2154 }, { "epoch": 0.1517071453713481, "grad_norm": 2.508040189743042, "learning_rate": 1.3463639051107172e-05, "loss": 0.8046, "step": 2155 }, { "epoch": 0.1517775431186202, "grad_norm": 2.2785561084747314, "learning_rate": 1.3463026146945621e-05, "loss": 0.6839, "step": 2156 }, { "epoch": 0.1518479408658923, "grad_norm": 2.246443033218384, "learning_rate": 1.3462412906765488e-05, "loss": 0.7419, "step": 2157 }, { "epoch": 0.15191833861316437, "grad_norm": 2.75984525680542, "learning_rate": 1.3461799330598654e-05, "loss": 0.6685, "step": 2158 }, { "epoch": 0.15198873636043647, "grad_norm": 2.513911008834839, "learning_rate": 1.346118541847702e-05, "loss": 0.8143, "step": 2159 }, { "epoch": 0.15205913410770855, "grad_norm": 2.6519956588745117, "learning_rate": 1.3460571170432507e-05, "loss": 0.7598, "step": 2160 }, { "epoch": 0.15212953185498065, "grad_norm": 2.294637441635132, "learning_rate": 1.3459956586497047e-05, "loss": 0.7726, "step": 2161 }, { "epoch": 0.15219992960225273, "grad_norm": 2.025163173675537, "learning_rate": 1.3459341666702595e-05, "loss": 0.8848, "step": 2162 }, { "epoch": 0.1522703273495248, "grad_norm": 2.119497299194336, "learning_rate": 1.3458726411081117e-05, "loss": 0.6976, "step": 2163 }, { "epoch": 0.15234072509679691, "grad_norm": 1.855493426322937, "learning_rate": 1.3458110819664604e-05, "loss": 0.8753, "step": 2164 }, { "epoch": 0.152411122844069, "grad_norm": 2.4184579849243164, "learning_rate": 1.3457494892485062e-05, "loss": 0.7751, "step": 2165 }, { "epoch": 0.15248152059134107, "grad_norm": 2.121934652328491, "learning_rate": 1.345687862957451e-05, "loss": 0.7568, "step": 2166 }, { "epoch": 0.15255191833861317, "grad_norm": 2.356076240539551, "learning_rate": 1.3456262030964993e-05, "loss": 0.7548, "step": 2167 }, { "epoch": 0.15262231608588525, "grad_norm": 2.0751259326934814, "learning_rate": 1.3455645096688564e-05, "loss": 0.7796, "step": 2168 }, { "epoch": 0.15269271383315733, "grad_norm": 2.3328893184661865, "learning_rate": 1.3455027826777303e-05, "loss": 0.815, "step": 2169 }, { "epoch": 0.15276311158042943, "grad_norm": 2.4695825576782227, "learning_rate": 1.3454410221263293e-05, "loss": 0.7275, "step": 2170 }, { "epoch": 0.1528335093277015, "grad_norm": 2.650050401687622, "learning_rate": 1.3453792280178656e-05, "loss": 0.7968, "step": 2171 }, { "epoch": 0.1529039070749736, "grad_norm": 1.8130273818969727, "learning_rate": 1.3453174003555511e-05, "loss": 0.967, "step": 2172 }, { "epoch": 0.1529743048222457, "grad_norm": 2.562685489654541, "learning_rate": 1.3452555391426006e-05, "loss": 0.7806, "step": 2173 }, { "epoch": 0.15304470256951777, "grad_norm": 2.0534095764160156, "learning_rate": 1.3451936443822304e-05, "loss": 0.732, "step": 2174 }, { "epoch": 0.15311510031678988, "grad_norm": 1.8753949403762817, "learning_rate": 1.3451317160776578e-05, "loss": 0.6831, "step": 2175 }, { "epoch": 0.15318549806406195, "grad_norm": 1.9486125707626343, "learning_rate": 1.3450697542321035e-05, "loss": 0.6278, "step": 2176 }, { "epoch": 0.15325589581133403, "grad_norm": 2.0178604125976562, "learning_rate": 1.3450077588487882e-05, "loss": 0.9266, "step": 2177 }, { "epoch": 0.15332629355860614, "grad_norm": 3.1256306171417236, "learning_rate": 1.3449457299309354e-05, "loss": 0.7522, "step": 2178 }, { "epoch": 0.1533966913058782, "grad_norm": 2.821002244949341, "learning_rate": 1.3448836674817702e-05, "loss": 0.7076, "step": 2179 }, { "epoch": 0.1534670890531503, "grad_norm": 2.4704196453094482, "learning_rate": 1.3448215715045189e-05, "loss": 0.7397, "step": 2180 }, { "epoch": 0.1535374868004224, "grad_norm": 2.1623754501342773, "learning_rate": 1.3447594420024101e-05, "loss": 0.6148, "step": 2181 }, { "epoch": 0.15360788454769447, "grad_norm": 2.4597678184509277, "learning_rate": 1.3446972789786742e-05, "loss": 0.7348, "step": 2182 }, { "epoch": 0.15367828229496655, "grad_norm": 2.5130789279937744, "learning_rate": 1.3446350824365426e-05, "loss": 0.776, "step": 2183 }, { "epoch": 0.15374868004223866, "grad_norm": 2.5441150665283203, "learning_rate": 1.3445728523792495e-05, "loss": 0.7392, "step": 2184 }, { "epoch": 0.15381907778951073, "grad_norm": 2.057218074798584, "learning_rate": 1.34451058881003e-05, "loss": 0.6171, "step": 2185 }, { "epoch": 0.1538894755367828, "grad_norm": 2.3416008949279785, "learning_rate": 1.3444482917321212e-05, "loss": 0.7609, "step": 2186 }, { "epoch": 0.15395987328405492, "grad_norm": 1.9904509782791138, "learning_rate": 1.344385961148762e-05, "loss": 0.756, "step": 2187 }, { "epoch": 0.154030271031327, "grad_norm": 2.3506217002868652, "learning_rate": 1.3443235970631932e-05, "loss": 0.7757, "step": 2188 }, { "epoch": 0.1541006687785991, "grad_norm": 2.0688605308532715, "learning_rate": 1.344261199478657e-05, "loss": 0.6806, "step": 2189 }, { "epoch": 0.15417106652587118, "grad_norm": 3.3171520233154297, "learning_rate": 1.3441987683983976e-05, "loss": 0.7951, "step": 2190 }, { "epoch": 0.15424146427314325, "grad_norm": 2.2091095447540283, "learning_rate": 1.344136303825661e-05, "loss": 0.7331, "step": 2191 }, { "epoch": 0.15431186202041536, "grad_norm": 2.05800199508667, "learning_rate": 1.344073805763694e-05, "loss": 0.7561, "step": 2192 }, { "epoch": 0.15438225976768744, "grad_norm": 2.123255729675293, "learning_rate": 1.3440112742157471e-05, "loss": 0.8071, "step": 2193 }, { "epoch": 0.1544526575149595, "grad_norm": 2.015413522720337, "learning_rate": 1.3439487091850706e-05, "loss": 0.7592, "step": 2194 }, { "epoch": 0.15452305526223162, "grad_norm": 2.4797780513763428, "learning_rate": 1.3438861106749176e-05, "loss": 0.8585, "step": 2195 }, { "epoch": 0.1545934530095037, "grad_norm": 1.9205869436264038, "learning_rate": 1.3438234786885425e-05, "loss": 0.785, "step": 2196 }, { "epoch": 0.15466385075677577, "grad_norm": 2.130974292755127, "learning_rate": 1.3437608132292016e-05, "loss": 0.7876, "step": 2197 }, { "epoch": 0.15473424850404788, "grad_norm": 2.844980239868164, "learning_rate": 1.3436981143001532e-05, "loss": 0.814, "step": 2198 }, { "epoch": 0.15480464625131996, "grad_norm": 1.8679320812225342, "learning_rate": 1.3436353819046566e-05, "loss": 0.7283, "step": 2199 }, { "epoch": 0.15487504399859203, "grad_norm": 2.1953063011169434, "learning_rate": 1.3435726160459738e-05, "loss": 0.6428, "step": 2200 }, { "epoch": 0.15494544174586414, "grad_norm": 2.0487847328186035, "learning_rate": 1.3435098167273676e-05, "loss": 0.6696, "step": 2201 }, { "epoch": 0.15501583949313621, "grad_norm": 2.10386061668396, "learning_rate": 1.3434469839521036e-05, "loss": 0.6764, "step": 2202 }, { "epoch": 0.15508623724040832, "grad_norm": 2.0979483127593994, "learning_rate": 1.3433841177234479e-05, "loss": 0.764, "step": 2203 }, { "epoch": 0.1551566349876804, "grad_norm": 2.217282295227051, "learning_rate": 1.3433212180446693e-05, "loss": 0.6789, "step": 2204 }, { "epoch": 0.15522703273495247, "grad_norm": 2.3624212741851807, "learning_rate": 1.3432582849190379e-05, "loss": 0.7321, "step": 2205 }, { "epoch": 0.15529743048222458, "grad_norm": 2.4162209033966064, "learning_rate": 1.3431953183498257e-05, "loss": 0.715, "step": 2206 }, { "epoch": 0.15536782822949666, "grad_norm": 2.024955987930298, "learning_rate": 1.3431323183403066e-05, "loss": 0.7963, "step": 2207 }, { "epoch": 0.15543822597676873, "grad_norm": 2.0557663440704346, "learning_rate": 1.3430692848937555e-05, "loss": 0.7879, "step": 2208 }, { "epoch": 0.15550862372404084, "grad_norm": 3.1832854747772217, "learning_rate": 1.34300621801345e-05, "loss": 0.7804, "step": 2209 }, { "epoch": 0.15557902147131292, "grad_norm": 2.731889486312866, "learning_rate": 1.3429431177026691e-05, "loss": 0.7807, "step": 2210 }, { "epoch": 0.155649419218585, "grad_norm": 2.2495880126953125, "learning_rate": 1.342879983964693e-05, "loss": 0.6807, "step": 2211 }, { "epoch": 0.1557198169658571, "grad_norm": 2.3592417240142822, "learning_rate": 1.3428168168028044e-05, "loss": 0.7953, "step": 2212 }, { "epoch": 0.15579021471312918, "grad_norm": 1.8323405981063843, "learning_rate": 1.342753616220287e-05, "loss": 0.7125, "step": 2213 }, { "epoch": 0.15586061246040125, "grad_norm": 1.968479871749878, "learning_rate": 1.3426903822204274e-05, "loss": 0.7996, "step": 2214 }, { "epoch": 0.15593101020767336, "grad_norm": 2.2477004528045654, "learning_rate": 1.3426271148065126e-05, "loss": 0.7637, "step": 2215 }, { "epoch": 0.15600140795494544, "grad_norm": 1.9478635787963867, "learning_rate": 1.3425638139818322e-05, "loss": 0.7581, "step": 2216 }, { "epoch": 0.15607180570221754, "grad_norm": 2.189944267272949, "learning_rate": 1.342500479749677e-05, "loss": 0.6811, "step": 2217 }, { "epoch": 0.15614220344948962, "grad_norm": 2.154630661010742, "learning_rate": 1.3424371121133401e-05, "loss": 0.8533, "step": 2218 }, { "epoch": 0.1562126011967617, "grad_norm": 1.8565149307250977, "learning_rate": 1.342373711076116e-05, "loss": 0.7078, "step": 2219 }, { "epoch": 0.1562829989440338, "grad_norm": 2.362338066101074, "learning_rate": 1.3423102766413006e-05, "loss": 0.8049, "step": 2220 }, { "epoch": 0.15635339669130588, "grad_norm": 2.048003911972046, "learning_rate": 1.3422468088121926e-05, "loss": 0.8115, "step": 2221 }, { "epoch": 0.15642379443857796, "grad_norm": 1.9707751274108887, "learning_rate": 1.3421833075920911e-05, "loss": 0.6435, "step": 2222 }, { "epoch": 0.15649419218585006, "grad_norm": 2.0584957599639893, "learning_rate": 1.3421197729842979e-05, "loss": 0.7382, "step": 2223 }, { "epoch": 0.15656458993312214, "grad_norm": 2.646688938140869, "learning_rate": 1.3420562049921162e-05, "loss": 0.8087, "step": 2224 }, { "epoch": 0.15663498768039422, "grad_norm": 2.012413501739502, "learning_rate": 1.3419926036188506e-05, "loss": 0.8742, "step": 2225 }, { "epoch": 0.15670538542766632, "grad_norm": 2.937718391418457, "learning_rate": 1.3419289688678085e-05, "loss": 0.6868, "step": 2226 }, { "epoch": 0.1567757831749384, "grad_norm": 2.1267971992492676, "learning_rate": 1.3418653007422977e-05, "loss": 0.8054, "step": 2227 }, { "epoch": 0.15684618092221048, "grad_norm": 2.001481294631958, "learning_rate": 1.3418015992456289e-05, "loss": 0.7436, "step": 2228 }, { "epoch": 0.15691657866948258, "grad_norm": 2.16641902923584, "learning_rate": 1.3417378643811137e-05, "loss": 0.7993, "step": 2229 }, { "epoch": 0.15698697641675466, "grad_norm": 2.2185988426208496, "learning_rate": 1.3416740961520656e-05, "loss": 0.6478, "step": 2230 }, { "epoch": 0.15705737416402676, "grad_norm": 2.1004598140716553, "learning_rate": 1.3416102945617998e-05, "loss": 0.7673, "step": 2231 }, { "epoch": 0.15712777191129884, "grad_norm": 1.9330203533172607, "learning_rate": 1.3415464596136342e-05, "loss": 0.685, "step": 2232 }, { "epoch": 0.15719816965857092, "grad_norm": 3.6383349895477295, "learning_rate": 1.3414825913108868e-05, "loss": 0.7829, "step": 2233 }, { "epoch": 0.15726856740584302, "grad_norm": 2.0717153549194336, "learning_rate": 1.3414186896568789e-05, "loss": 0.7173, "step": 2234 }, { "epoch": 0.1573389651531151, "grad_norm": 2.290285587310791, "learning_rate": 1.3413547546549322e-05, "loss": 0.7528, "step": 2235 }, { "epoch": 0.15740936290038718, "grad_norm": 2.3462352752685547, "learning_rate": 1.3412907863083707e-05, "loss": 0.7825, "step": 2236 }, { "epoch": 0.15747976064765928, "grad_norm": 2.1603755950927734, "learning_rate": 1.3412267846205208e-05, "loss": 0.707, "step": 2237 }, { "epoch": 0.15755015839493136, "grad_norm": 1.8994009494781494, "learning_rate": 1.3411627495947095e-05, "loss": 0.7562, "step": 2238 }, { "epoch": 0.15762055614220344, "grad_norm": 2.3450326919555664, "learning_rate": 1.3410986812342663e-05, "loss": 0.7931, "step": 2239 }, { "epoch": 0.15769095388947554, "grad_norm": 2.3198678493499756, "learning_rate": 1.341034579542522e-05, "loss": 0.8035, "step": 2240 }, { "epoch": 0.15776135163674762, "grad_norm": 1.978468656539917, "learning_rate": 1.340970444522809e-05, "loss": 0.7396, "step": 2241 }, { "epoch": 0.1578317493840197, "grad_norm": 2.1059083938598633, "learning_rate": 1.3409062761784625e-05, "loss": 0.703, "step": 2242 }, { "epoch": 0.1579021471312918, "grad_norm": 2.486313581466675, "learning_rate": 1.3408420745128184e-05, "loss": 0.6329, "step": 2243 }, { "epoch": 0.15797254487856388, "grad_norm": 2.99828839302063, "learning_rate": 1.3407778395292139e-05, "loss": 0.7217, "step": 2244 }, { "epoch": 0.15804294262583599, "grad_norm": 2.048733949661255, "learning_rate": 1.3407135712309897e-05, "loss": 0.7864, "step": 2245 }, { "epoch": 0.15811334037310806, "grad_norm": 2.0664103031158447, "learning_rate": 1.3406492696214864e-05, "loss": 0.8935, "step": 2246 }, { "epoch": 0.15818373812038014, "grad_norm": 2.1033244132995605, "learning_rate": 1.3405849347040475e-05, "loss": 0.7721, "step": 2247 }, { "epoch": 0.15825413586765225, "grad_norm": 1.955871820449829, "learning_rate": 1.3405205664820177e-05, "loss": 0.739, "step": 2248 }, { "epoch": 0.15832453361492432, "grad_norm": 5.485831260681152, "learning_rate": 1.3404561649587433e-05, "loss": 0.6948, "step": 2249 }, { "epoch": 0.1583949313621964, "grad_norm": 2.0183870792388916, "learning_rate": 1.340391730137573e-05, "loss": 0.7003, "step": 2250 }, { "epoch": 0.1584653291094685, "grad_norm": 1.9296573400497437, "learning_rate": 1.3403272620218569e-05, "loss": 0.6879, "step": 2251 }, { "epoch": 0.15853572685674058, "grad_norm": 2.3384599685668945, "learning_rate": 1.3402627606149464e-05, "loss": 0.8641, "step": 2252 }, { "epoch": 0.15860612460401266, "grad_norm": 2.263939380645752, "learning_rate": 1.3401982259201952e-05, "loss": 0.7736, "step": 2253 }, { "epoch": 0.15867652235128477, "grad_norm": 1.8373514413833618, "learning_rate": 1.340133657940958e-05, "loss": 0.7518, "step": 2254 }, { "epoch": 0.15874692009855684, "grad_norm": 3.5653927326202393, "learning_rate": 1.3400690566805927e-05, "loss": 0.7721, "step": 2255 }, { "epoch": 0.15881731784582892, "grad_norm": 2.4890785217285156, "learning_rate": 1.3400044221424574e-05, "loss": 0.7006, "step": 2256 }, { "epoch": 0.15888771559310103, "grad_norm": 2.1866347789764404, "learning_rate": 1.3399397543299124e-05, "loss": 0.6805, "step": 2257 }, { "epoch": 0.1589581133403731, "grad_norm": 2.399622917175293, "learning_rate": 1.33987505324632e-05, "loss": 0.6942, "step": 2258 }, { "epoch": 0.1590285110876452, "grad_norm": 2.1519815921783447, "learning_rate": 1.3398103188950441e-05, "loss": 0.7403, "step": 2259 }, { "epoch": 0.15909890883491729, "grad_norm": 2.3854563236236572, "learning_rate": 1.3397455512794502e-05, "loss": 0.735, "step": 2260 }, { "epoch": 0.15916930658218936, "grad_norm": 2.7952616214752197, "learning_rate": 1.339680750402906e-05, "loss": 0.7499, "step": 2261 }, { "epoch": 0.15923970432946147, "grad_norm": 2.271498203277588, "learning_rate": 1.3396159162687799e-05, "loss": 0.8105, "step": 2262 }, { "epoch": 0.15931010207673355, "grad_norm": 2.264178991317749, "learning_rate": 1.3395510488804431e-05, "loss": 0.7843, "step": 2263 }, { "epoch": 0.15938049982400562, "grad_norm": 3.0443201065063477, "learning_rate": 1.3394861482412683e-05, "loss": 0.6566, "step": 2264 }, { "epoch": 0.15945089757127773, "grad_norm": 2.3530914783477783, "learning_rate": 1.3394212143546293e-05, "loss": 0.6824, "step": 2265 }, { "epoch": 0.1595212953185498, "grad_norm": 2.1352293491363525, "learning_rate": 1.3393562472239024e-05, "loss": 0.8303, "step": 2266 }, { "epoch": 0.15959169306582188, "grad_norm": 2.3257193565368652, "learning_rate": 1.3392912468524651e-05, "loss": 0.7825, "step": 2267 }, { "epoch": 0.159662090813094, "grad_norm": 1.9754266738891602, "learning_rate": 1.339226213243697e-05, "loss": 0.6425, "step": 2268 }, { "epoch": 0.15973248856036606, "grad_norm": 1.8860704898834229, "learning_rate": 1.339161146400979e-05, "loss": 0.8461, "step": 2269 }, { "epoch": 0.15980288630763814, "grad_norm": 2.1612155437469482, "learning_rate": 1.3390960463276942e-05, "loss": 0.8282, "step": 2270 }, { "epoch": 0.15987328405491025, "grad_norm": 2.236626625061035, "learning_rate": 1.339030913027227e-05, "loss": 0.7745, "step": 2271 }, { "epoch": 0.15994368180218232, "grad_norm": 2.0775482654571533, "learning_rate": 1.3389657465029642e-05, "loss": 0.6815, "step": 2272 }, { "epoch": 0.16001407954945443, "grad_norm": 2.5543296337127686, "learning_rate": 1.3389005467582935e-05, "loss": 0.8643, "step": 2273 }, { "epoch": 0.1600844772967265, "grad_norm": 2.5123369693756104, "learning_rate": 1.3388353137966045e-05, "loss": 0.8307, "step": 2274 }, { "epoch": 0.16015487504399858, "grad_norm": 2.1633782386779785, "learning_rate": 1.3387700476212891e-05, "loss": 0.7254, "step": 2275 }, { "epoch": 0.1602252727912707, "grad_norm": 2.167743444442749, "learning_rate": 1.3387047482357404e-05, "loss": 0.8127, "step": 2276 }, { "epoch": 0.16029567053854277, "grad_norm": 2.1557769775390625, "learning_rate": 1.3386394156433536e-05, "loss": 0.7612, "step": 2277 }, { "epoch": 0.16036606828581484, "grad_norm": 2.0518486499786377, "learning_rate": 1.338574049847525e-05, "loss": 0.7088, "step": 2278 }, { "epoch": 0.16043646603308695, "grad_norm": 2.3461802005767822, "learning_rate": 1.3385086508516531e-05, "loss": 0.8245, "step": 2279 }, { "epoch": 0.16050686378035903, "grad_norm": 2.3803043365478516, "learning_rate": 1.3384432186591385e-05, "loss": 0.7421, "step": 2280 }, { "epoch": 0.1605772615276311, "grad_norm": 1.9778993129730225, "learning_rate": 1.3383777532733826e-05, "loss": 0.8539, "step": 2281 }, { "epoch": 0.1606476592749032, "grad_norm": 2.046851873397827, "learning_rate": 1.3383122546977891e-05, "loss": 0.6805, "step": 2282 }, { "epoch": 0.1607180570221753, "grad_norm": 2.5133917331695557, "learning_rate": 1.3382467229357634e-05, "loss": 0.6588, "step": 2283 }, { "epoch": 0.16078845476944736, "grad_norm": 2.1003623008728027, "learning_rate": 1.3381811579907128e-05, "loss": 0.6426, "step": 2284 }, { "epoch": 0.16085885251671947, "grad_norm": 2.269505500793457, "learning_rate": 1.3381155598660454e-05, "loss": 0.7446, "step": 2285 }, { "epoch": 0.16092925026399155, "grad_norm": 2.2276012897491455, "learning_rate": 1.3380499285651723e-05, "loss": 0.6928, "step": 2286 }, { "epoch": 0.16099964801126365, "grad_norm": 2.2092926502227783, "learning_rate": 1.3379842640915057e-05, "loss": 0.7758, "step": 2287 }, { "epoch": 0.16107004575853573, "grad_norm": 2.1171512603759766, "learning_rate": 1.3379185664484592e-05, "loss": 0.7819, "step": 2288 }, { "epoch": 0.1611404435058078, "grad_norm": 2.269700288772583, "learning_rate": 1.3378528356394488e-05, "loss": 0.7713, "step": 2289 }, { "epoch": 0.1612108412530799, "grad_norm": 2.1459641456604004, "learning_rate": 1.3377870716678918e-05, "loss": 0.7447, "step": 2290 }, { "epoch": 0.161281239000352, "grad_norm": 1.8733084201812744, "learning_rate": 1.3377212745372074e-05, "loss": 0.7936, "step": 2291 }, { "epoch": 0.16135163674762407, "grad_norm": 2.2887446880340576, "learning_rate": 1.3376554442508163e-05, "loss": 0.8308, "step": 2292 }, { "epoch": 0.16142203449489617, "grad_norm": 2.080447196960449, "learning_rate": 1.3375895808121412e-05, "loss": 0.7351, "step": 2293 }, { "epoch": 0.16149243224216825, "grad_norm": 2.197416305541992, "learning_rate": 1.3375236842246063e-05, "loss": 0.7453, "step": 2294 }, { "epoch": 0.16156282998944033, "grad_norm": 2.067242383956909, "learning_rate": 1.3374577544916378e-05, "loss": 0.7766, "step": 2295 }, { "epoch": 0.16163322773671243, "grad_norm": 2.0386738777160645, "learning_rate": 1.3373917916166632e-05, "loss": 0.6798, "step": 2296 }, { "epoch": 0.1617036254839845, "grad_norm": 1.988314151763916, "learning_rate": 1.3373257956031122e-05, "loss": 0.8025, "step": 2297 }, { "epoch": 0.1617740232312566, "grad_norm": 2.455324649810791, "learning_rate": 1.3372597664544159e-05, "loss": 0.8314, "step": 2298 }, { "epoch": 0.1618444209785287, "grad_norm": 2.0391433238983154, "learning_rate": 1.337193704174007e-05, "loss": 0.841, "step": 2299 }, { "epoch": 0.16191481872580077, "grad_norm": 1.9973831176757812, "learning_rate": 1.3371276087653205e-05, "loss": 0.7076, "step": 2300 }, { "epoch": 0.16198521647307287, "grad_norm": 2.609011650085449, "learning_rate": 1.3370614802317926e-05, "loss": 0.8251, "step": 2301 }, { "epoch": 0.16205561422034495, "grad_norm": 2.0627872943878174, "learning_rate": 1.3369953185768614e-05, "loss": 0.7732, "step": 2302 }, { "epoch": 0.16212601196761703, "grad_norm": 2.197601318359375, "learning_rate": 1.3369291238039666e-05, "loss": 0.7507, "step": 2303 }, { "epoch": 0.16219640971488913, "grad_norm": 2.254216432571411, "learning_rate": 1.33686289591655e-05, "loss": 0.7857, "step": 2304 }, { "epoch": 0.1622668074621612, "grad_norm": 2.0232017040252686, "learning_rate": 1.3367966349180545e-05, "loss": 0.7182, "step": 2305 }, { "epoch": 0.1623372052094333, "grad_norm": 2.4330008029937744, "learning_rate": 1.3367303408119252e-05, "loss": 0.6904, "step": 2306 }, { "epoch": 0.1624076029567054, "grad_norm": 2.12690806388855, "learning_rate": 1.336664013601609e-05, "loss": 0.8127, "step": 2307 }, { "epoch": 0.16247800070397747, "grad_norm": 2.2876992225646973, "learning_rate": 1.336597653290554e-05, "loss": 0.7146, "step": 2308 }, { "epoch": 0.16254839845124955, "grad_norm": 2.1194303035736084, "learning_rate": 1.3365312598822104e-05, "loss": 0.7191, "step": 2309 }, { "epoch": 0.16261879619852165, "grad_norm": 2.3384408950805664, "learning_rate": 1.3364648333800304e-05, "loss": 0.6689, "step": 2310 }, { "epoch": 0.16268919394579373, "grad_norm": 2.0537619590759277, "learning_rate": 1.3363983737874669e-05, "loss": 0.7001, "step": 2311 }, { "epoch": 0.1627595916930658, "grad_norm": 2.058896541595459, "learning_rate": 1.336331881107976e-05, "loss": 0.7524, "step": 2312 }, { "epoch": 0.1628299894403379, "grad_norm": 2.3189120292663574, "learning_rate": 1.336265355345014e-05, "loss": 0.7763, "step": 2313 }, { "epoch": 0.16290038718761, "grad_norm": 2.5794613361358643, "learning_rate": 1.3361987965020401e-05, "loss": 0.8221, "step": 2314 }, { "epoch": 0.1629707849348821, "grad_norm": 2.224811315536499, "learning_rate": 1.3361322045825145e-05, "loss": 0.6423, "step": 2315 }, { "epoch": 0.16304118268215417, "grad_norm": 2.902578353881836, "learning_rate": 1.3360655795898997e-05, "loss": 0.7404, "step": 2316 }, { "epoch": 0.16311158042942625, "grad_norm": 2.059739351272583, "learning_rate": 1.3359989215276592e-05, "loss": 0.7916, "step": 2317 }, { "epoch": 0.16318197817669836, "grad_norm": 2.0754635334014893, "learning_rate": 1.3359322303992587e-05, "loss": 0.8201, "step": 2318 }, { "epoch": 0.16325237592397043, "grad_norm": 2.681429386138916, "learning_rate": 1.3358655062081655e-05, "loss": 0.8236, "step": 2319 }, { "epoch": 0.1633227736712425, "grad_norm": 2.0360195636749268, "learning_rate": 1.3357987489578492e-05, "loss": 0.7576, "step": 2320 }, { "epoch": 0.16339317141851462, "grad_norm": 1.8847503662109375, "learning_rate": 1.3357319586517797e-05, "loss": 0.8359, "step": 2321 }, { "epoch": 0.1634635691657867, "grad_norm": 1.9458824396133423, "learning_rate": 1.33566513529343e-05, "loss": 0.8049, "step": 2322 }, { "epoch": 0.16353396691305877, "grad_norm": 2.267979383468628, "learning_rate": 1.3355982788862744e-05, "loss": 0.6165, "step": 2323 }, { "epoch": 0.16360436466033088, "grad_norm": 1.9858193397521973, "learning_rate": 1.3355313894337885e-05, "loss": 0.8363, "step": 2324 }, { "epoch": 0.16367476240760295, "grad_norm": 1.9259897470474243, "learning_rate": 1.3354644669394502e-05, "loss": 0.8054, "step": 2325 }, { "epoch": 0.16374516015487503, "grad_norm": 2.0841140747070312, "learning_rate": 1.3353975114067387e-05, "loss": 0.7083, "step": 2326 }, { "epoch": 0.16381555790214714, "grad_norm": 1.9425593614578247, "learning_rate": 1.335330522839135e-05, "loss": 0.7354, "step": 2327 }, { "epoch": 0.1638859556494192, "grad_norm": 1.9074454307556152, "learning_rate": 1.3352635012401224e-05, "loss": 0.7817, "step": 2328 }, { "epoch": 0.16395635339669132, "grad_norm": 1.9436166286468506, "learning_rate": 1.3351964466131845e-05, "loss": 0.7134, "step": 2329 }, { "epoch": 0.1640267511439634, "grad_norm": 2.320754289627075, "learning_rate": 1.3351293589618086e-05, "loss": 0.7886, "step": 2330 }, { "epoch": 0.16409714889123547, "grad_norm": 2.362657308578491, "learning_rate": 1.3350622382894817e-05, "loss": 0.7941, "step": 2331 }, { "epoch": 0.16416754663850758, "grad_norm": 2.320167064666748, "learning_rate": 1.3349950845996942e-05, "loss": 0.7494, "step": 2332 }, { "epoch": 0.16423794438577966, "grad_norm": 2.1466825008392334, "learning_rate": 1.334927897895937e-05, "loss": 0.8336, "step": 2333 }, { "epoch": 0.16430834213305173, "grad_norm": 2.160548448562622, "learning_rate": 1.3348606781817034e-05, "loss": 0.7543, "step": 2334 }, { "epoch": 0.16437873988032384, "grad_norm": 2.634936571121216, "learning_rate": 1.3347934254604881e-05, "loss": 0.6157, "step": 2335 }, { "epoch": 0.16444913762759591, "grad_norm": 2.275090456008911, "learning_rate": 1.3347261397357877e-05, "loss": 0.6974, "step": 2336 }, { "epoch": 0.164519535374868, "grad_norm": 2.03910756111145, "learning_rate": 1.3346588210111006e-05, "loss": 0.7872, "step": 2337 }, { "epoch": 0.1645899331221401, "grad_norm": 2.0447120666503906, "learning_rate": 1.3345914692899267e-05, "loss": 0.7645, "step": 2338 }, { "epoch": 0.16466033086941217, "grad_norm": 1.7745771408081055, "learning_rate": 1.3345240845757673e-05, "loss": 0.7502, "step": 2339 }, { "epoch": 0.16473072861668428, "grad_norm": 1.9476163387298584, "learning_rate": 1.3344566668721263e-05, "loss": 0.8383, "step": 2340 }, { "epoch": 0.16480112636395636, "grad_norm": 4.614167213439941, "learning_rate": 1.3343892161825085e-05, "loss": 0.8415, "step": 2341 }, { "epoch": 0.16487152411122843, "grad_norm": 2.2071049213409424, "learning_rate": 1.3343217325104208e-05, "loss": 0.7134, "step": 2342 }, { "epoch": 0.16494192185850054, "grad_norm": 2.145023822784424, "learning_rate": 1.3342542158593719e-05, "loss": 0.7967, "step": 2343 }, { "epoch": 0.16501231960577262, "grad_norm": 2.7442305088043213, "learning_rate": 1.3341866662328718e-05, "loss": 0.7354, "step": 2344 }, { "epoch": 0.1650827173530447, "grad_norm": 2.2328155040740967, "learning_rate": 1.3341190836344324e-05, "loss": 0.7182, "step": 2345 }, { "epoch": 0.1651531151003168, "grad_norm": 2.5931296348571777, "learning_rate": 1.3340514680675678e-05, "loss": 0.7595, "step": 2346 }, { "epoch": 0.16522351284758888, "grad_norm": 2.1896555423736572, "learning_rate": 1.333983819535793e-05, "loss": 0.7745, "step": 2347 }, { "epoch": 0.16529391059486095, "grad_norm": 2.5776174068450928, "learning_rate": 1.3339161380426253e-05, "loss": 0.7415, "step": 2348 }, { "epoch": 0.16536430834213306, "grad_norm": 2.020125150680542, "learning_rate": 1.3338484235915836e-05, "loss": 0.8, "step": 2349 }, { "epoch": 0.16543470608940514, "grad_norm": 2.0102787017822266, "learning_rate": 1.3337806761861881e-05, "loss": 0.704, "step": 2350 }, { "epoch": 0.16550510383667721, "grad_norm": 2.491095542907715, "learning_rate": 1.3337128958299613e-05, "loss": 0.7887, "step": 2351 }, { "epoch": 0.16557550158394932, "grad_norm": 1.9096992015838623, "learning_rate": 1.3336450825264272e-05, "loss": 0.7159, "step": 2352 }, { "epoch": 0.1656458993312214, "grad_norm": 2.121264696121216, "learning_rate": 1.3335772362791113e-05, "loss": 0.8381, "step": 2353 }, { "epoch": 0.1657162970784935, "grad_norm": 2.0532548427581787, "learning_rate": 1.3335093570915414e-05, "loss": 0.8544, "step": 2354 }, { "epoch": 0.16578669482576558, "grad_norm": 2.589737892150879, "learning_rate": 1.3334414449672459e-05, "loss": 0.7116, "step": 2355 }, { "epoch": 0.16585709257303766, "grad_norm": 2.145540475845337, "learning_rate": 1.333373499909756e-05, "loss": 0.6991, "step": 2356 }, { "epoch": 0.16592749032030976, "grad_norm": 2.000086784362793, "learning_rate": 1.3333055219226045e-05, "loss": 0.8552, "step": 2357 }, { "epoch": 0.16599788806758184, "grad_norm": 2.23633074760437, "learning_rate": 1.3332375110093253e-05, "loss": 0.7965, "step": 2358 }, { "epoch": 0.16606828581485392, "grad_norm": 2.246675491333008, "learning_rate": 1.3331694671734542e-05, "loss": 0.7207, "step": 2359 }, { "epoch": 0.16613868356212602, "grad_norm": 1.994308590888977, "learning_rate": 1.3331013904185291e-05, "loss": 0.7178, "step": 2360 }, { "epoch": 0.1662090813093981, "grad_norm": 2.5853710174560547, "learning_rate": 1.3330332807480896e-05, "loss": 0.6676, "step": 2361 }, { "epoch": 0.16627947905667018, "grad_norm": 1.7984718084335327, "learning_rate": 1.3329651381656762e-05, "loss": 0.7783, "step": 2362 }, { "epoch": 0.16634987680394228, "grad_norm": 2.080811023712158, "learning_rate": 1.3328969626748323e-05, "loss": 0.8192, "step": 2363 }, { "epoch": 0.16642027455121436, "grad_norm": 2.499884605407715, "learning_rate": 1.332828754279102e-05, "loss": 0.8739, "step": 2364 }, { "epoch": 0.16649067229848644, "grad_norm": 2.087714195251465, "learning_rate": 1.3327605129820316e-05, "loss": 0.7243, "step": 2365 }, { "epoch": 0.16656107004575854, "grad_norm": 1.7984980344772339, "learning_rate": 1.3326922387871692e-05, "loss": 0.757, "step": 2366 }, { "epoch": 0.16663146779303062, "grad_norm": 1.8298848867416382, "learning_rate": 1.332623931698064e-05, "loss": 0.8201, "step": 2367 }, { "epoch": 0.16670186554030272, "grad_norm": 2.3044862747192383, "learning_rate": 1.332555591718268e-05, "loss": 0.8105, "step": 2368 }, { "epoch": 0.1667722632875748, "grad_norm": 1.8338335752487183, "learning_rate": 1.3324872188513339e-05, "loss": 0.7019, "step": 2369 }, { "epoch": 0.16684266103484688, "grad_norm": 2.242663860321045, "learning_rate": 1.3324188131008164e-05, "loss": 0.824, "step": 2370 }, { "epoch": 0.16691305878211898, "grad_norm": 2.0214903354644775, "learning_rate": 1.3323503744702721e-05, "loss": 0.8109, "step": 2371 }, { "epoch": 0.16698345652939106, "grad_norm": 3.2261860370635986, "learning_rate": 1.3322819029632588e-05, "loss": 0.8081, "step": 2372 }, { "epoch": 0.16705385427666314, "grad_norm": 2.1992270946502686, "learning_rate": 1.3322133985833371e-05, "loss": 0.8043, "step": 2373 }, { "epoch": 0.16712425202393524, "grad_norm": 3.584484577178955, "learning_rate": 1.3321448613340683e-05, "loss": 0.7316, "step": 2374 }, { "epoch": 0.16719464977120732, "grad_norm": 2.423704147338867, "learning_rate": 1.3320762912190153e-05, "loss": 0.7587, "step": 2375 }, { "epoch": 0.1672650475184794, "grad_norm": 1.9866315126419067, "learning_rate": 1.3320076882417436e-05, "loss": 0.8232, "step": 2376 }, { "epoch": 0.1673354452657515, "grad_norm": 2.3284404277801514, "learning_rate": 1.33193905240582e-05, "loss": 0.7887, "step": 2377 }, { "epoch": 0.16740584301302358, "grad_norm": 1.9245001077651978, "learning_rate": 1.3318703837148125e-05, "loss": 0.725, "step": 2378 }, { "epoch": 0.16747624076029566, "grad_norm": 2.1582086086273193, "learning_rate": 1.3318016821722916e-05, "loss": 0.7719, "step": 2379 }, { "epoch": 0.16754663850756776, "grad_norm": 6.218706130981445, "learning_rate": 1.3317329477818292e-05, "loss": 0.8714, "step": 2380 }, { "epoch": 0.16761703625483984, "grad_norm": 2.1015031337738037, "learning_rate": 1.3316641805469985e-05, "loss": 0.6966, "step": 2381 }, { "epoch": 0.16768743400211195, "grad_norm": 2.3775453567504883, "learning_rate": 1.331595380471375e-05, "loss": 0.7792, "step": 2382 }, { "epoch": 0.16775783174938402, "grad_norm": 1.8784070014953613, "learning_rate": 1.3315265475585358e-05, "loss": 0.7812, "step": 2383 }, { "epoch": 0.1678282294966561, "grad_norm": 2.476595640182495, "learning_rate": 1.3314576818120593e-05, "loss": 0.8238, "step": 2384 }, { "epoch": 0.1678986272439282, "grad_norm": 1.7798161506652832, "learning_rate": 1.3313887832355261e-05, "loss": 0.7973, "step": 2385 }, { "epoch": 0.16796902499120028, "grad_norm": 2.3632750511169434, "learning_rate": 1.3313198518325183e-05, "loss": 0.6543, "step": 2386 }, { "epoch": 0.16803942273847236, "grad_norm": 1.9204093217849731, "learning_rate": 1.3312508876066196e-05, "loss": 0.7193, "step": 2387 }, { "epoch": 0.16810982048574447, "grad_norm": 2.0103089809417725, "learning_rate": 1.3311818905614157e-05, "loss": 0.8695, "step": 2388 }, { "epoch": 0.16818021823301654, "grad_norm": 6.055141925811768, "learning_rate": 1.3311128607004935e-05, "loss": 0.6861, "step": 2389 }, { "epoch": 0.16825061598028862, "grad_norm": 1.7880421876907349, "learning_rate": 1.331043798027442e-05, "loss": 0.6338, "step": 2390 }, { "epoch": 0.16832101372756073, "grad_norm": 2.1701903343200684, "learning_rate": 1.3309747025458523e-05, "loss": 0.7478, "step": 2391 }, { "epoch": 0.1683914114748328, "grad_norm": 1.8264164924621582, "learning_rate": 1.3309055742593161e-05, "loss": 0.7174, "step": 2392 }, { "epoch": 0.16846180922210488, "grad_norm": 2.307135581970215, "learning_rate": 1.3308364131714278e-05, "loss": 0.8012, "step": 2393 }, { "epoch": 0.16853220696937699, "grad_norm": 2.2471299171447754, "learning_rate": 1.330767219285783e-05, "loss": 0.7657, "step": 2394 }, { "epoch": 0.16860260471664906, "grad_norm": 1.968122959136963, "learning_rate": 1.3306979926059792e-05, "loss": 0.8152, "step": 2395 }, { "epoch": 0.16867300246392117, "grad_norm": 1.9301185607910156, "learning_rate": 1.3306287331356156e-05, "loss": 0.6932, "step": 2396 }, { "epoch": 0.16874340021119325, "grad_norm": 2.2628538608551025, "learning_rate": 1.330559440878293e-05, "loss": 0.7907, "step": 2397 }, { "epoch": 0.16881379795846532, "grad_norm": 2.432068347930908, "learning_rate": 1.3304901158376142e-05, "loss": 0.651, "step": 2398 }, { "epoch": 0.16888419570573743, "grad_norm": 2.601055383682251, "learning_rate": 1.3304207580171829e-05, "loss": 0.791, "step": 2399 }, { "epoch": 0.1689545934530095, "grad_norm": 1.9932613372802734, "learning_rate": 1.3303513674206059e-05, "loss": 0.7739, "step": 2400 }, { "epoch": 0.16902499120028158, "grad_norm": 2.4703972339630127, "learning_rate": 1.3302819440514901e-05, "loss": 0.7188, "step": 2401 }, { "epoch": 0.1690953889475537, "grad_norm": 2.1887924671173096, "learning_rate": 1.3302124879134452e-05, "loss": 0.7068, "step": 2402 }, { "epoch": 0.16916578669482576, "grad_norm": 1.9023466110229492, "learning_rate": 1.3301429990100824e-05, "loss": 0.679, "step": 2403 }, { "epoch": 0.16923618444209784, "grad_norm": 1.864626407623291, "learning_rate": 1.3300734773450144e-05, "loss": 0.7485, "step": 2404 }, { "epoch": 0.16930658218936995, "grad_norm": 1.98395836353302, "learning_rate": 1.3300039229218557e-05, "loss": 0.7816, "step": 2405 }, { "epoch": 0.16937697993664202, "grad_norm": 2.1433682441711426, "learning_rate": 1.3299343357442223e-05, "loss": 0.7344, "step": 2406 }, { "epoch": 0.1694473776839141, "grad_norm": 2.1019911766052246, "learning_rate": 1.3298647158157326e-05, "loss": 0.7512, "step": 2407 }, { "epoch": 0.1695177754311862, "grad_norm": 2.175269842147827, "learning_rate": 1.3297950631400058e-05, "loss": 0.8553, "step": 2408 }, { "epoch": 0.16958817317845828, "grad_norm": 2.8591771125793457, "learning_rate": 1.3297253777206631e-05, "loss": 0.8061, "step": 2409 }, { "epoch": 0.1696585709257304, "grad_norm": 1.923150897026062, "learning_rate": 1.3296556595613277e-05, "loss": 0.6875, "step": 2410 }, { "epoch": 0.16972896867300247, "grad_norm": 1.8999671936035156, "learning_rate": 1.3295859086656245e-05, "loss": 0.836, "step": 2411 }, { "epoch": 0.16979936642027454, "grad_norm": 2.0105485916137695, "learning_rate": 1.3295161250371797e-05, "loss": 0.7317, "step": 2412 }, { "epoch": 0.16986976416754665, "grad_norm": 2.3521924018859863, "learning_rate": 1.3294463086796213e-05, "loss": 0.7686, "step": 2413 }, { "epoch": 0.16994016191481873, "grad_norm": 2.1716041564941406, "learning_rate": 1.3293764595965795e-05, "loss": 0.7637, "step": 2414 }, { "epoch": 0.1700105596620908, "grad_norm": 1.8556751012802124, "learning_rate": 1.3293065777916854e-05, "loss": 0.7504, "step": 2415 }, { "epoch": 0.1700809574093629, "grad_norm": 1.9370185136795044, "learning_rate": 1.3292366632685724e-05, "loss": 0.7634, "step": 2416 }, { "epoch": 0.170151355156635, "grad_norm": 2.143339157104492, "learning_rate": 1.3291667160308752e-05, "loss": 0.7546, "step": 2417 }, { "epoch": 0.17022175290390706, "grad_norm": 1.679125428199768, "learning_rate": 1.3290967360822309e-05, "loss": 0.7335, "step": 2418 }, { "epoch": 0.17029215065117917, "grad_norm": 2.009000539779663, "learning_rate": 1.3290267234262776e-05, "loss": 0.7636, "step": 2419 }, { "epoch": 0.17036254839845125, "grad_norm": 2.1441385746002197, "learning_rate": 1.3289566780666551e-05, "loss": 0.719, "step": 2420 }, { "epoch": 0.17043294614572332, "grad_norm": 2.4081528186798096, "learning_rate": 1.3288866000070052e-05, "loss": 0.8279, "step": 2421 }, { "epoch": 0.17050334389299543, "grad_norm": 1.6819441318511963, "learning_rate": 1.3288164892509717e-05, "loss": 0.8701, "step": 2422 }, { "epoch": 0.1705737416402675, "grad_norm": 1.7235552072525024, "learning_rate": 1.3287463458021992e-05, "loss": 0.7003, "step": 2423 }, { "epoch": 0.1706441393875396, "grad_norm": 1.794406771659851, "learning_rate": 1.328676169664335e-05, "loss": 0.748, "step": 2424 }, { "epoch": 0.1707145371348117, "grad_norm": 2.6313130855560303, "learning_rate": 1.328605960841027e-05, "loss": 0.6794, "step": 2425 }, { "epoch": 0.17078493488208377, "grad_norm": 2.1056129932403564, "learning_rate": 1.328535719335926e-05, "loss": 0.7153, "step": 2426 }, { "epoch": 0.17085533262935587, "grad_norm": 1.8124769926071167, "learning_rate": 1.3284654451526838e-05, "loss": 0.8174, "step": 2427 }, { "epoch": 0.17092573037662795, "grad_norm": 2.2503185272216797, "learning_rate": 1.328395138294954e-05, "loss": 0.8675, "step": 2428 }, { "epoch": 0.17099612812390003, "grad_norm": 2.0855090618133545, "learning_rate": 1.3283247987663915e-05, "loss": 0.7706, "step": 2429 }, { "epoch": 0.17106652587117213, "grad_norm": 1.8309695720672607, "learning_rate": 1.3282544265706538e-05, "loss": 0.7201, "step": 2430 }, { "epoch": 0.1711369236184442, "grad_norm": 1.942726731300354, "learning_rate": 1.3281840217113996e-05, "loss": 0.7116, "step": 2431 }, { "epoch": 0.1712073213657163, "grad_norm": 1.9851869344711304, "learning_rate": 1.3281135841922892e-05, "loss": 0.8114, "step": 2432 }, { "epoch": 0.1712777191129884, "grad_norm": 1.8475958108901978, "learning_rate": 1.3280431140169847e-05, "loss": 0.7688, "step": 2433 }, { "epoch": 0.17134811686026047, "grad_norm": 1.816476821899414, "learning_rate": 1.3279726111891498e-05, "loss": 0.6536, "step": 2434 }, { "epoch": 0.17141851460753255, "grad_norm": 2.042128801345825, "learning_rate": 1.3279020757124502e-05, "loss": 0.7782, "step": 2435 }, { "epoch": 0.17148891235480465, "grad_norm": 1.9167205095291138, "learning_rate": 1.327831507590553e-05, "loss": 0.7921, "step": 2436 }, { "epoch": 0.17155931010207673, "grad_norm": 1.9378328323364258, "learning_rate": 1.327760906827127e-05, "loss": 0.6377, "step": 2437 }, { "epoch": 0.17162970784934883, "grad_norm": 2.2389066219329834, "learning_rate": 1.3276902734258432e-05, "loss": 0.737, "step": 2438 }, { "epoch": 0.1717001055966209, "grad_norm": 1.9270156621932983, "learning_rate": 1.3276196073903734e-05, "loss": 0.7465, "step": 2439 }, { "epoch": 0.171770503343893, "grad_norm": 2.0876636505126953, "learning_rate": 1.3275489087243921e-05, "loss": 0.7441, "step": 2440 }, { "epoch": 0.1718409010911651, "grad_norm": 2.8114848136901855, "learning_rate": 1.3274781774315743e-05, "loss": 0.5954, "step": 2441 }, { "epoch": 0.17191129883843717, "grad_norm": 1.7931652069091797, "learning_rate": 1.327407413515598e-05, "loss": 0.9398, "step": 2442 }, { "epoch": 0.17198169658570925, "grad_norm": 2.411499261856079, "learning_rate": 1.327336616980142e-05, "loss": 0.7173, "step": 2443 }, { "epoch": 0.17205209433298135, "grad_norm": 1.964766263961792, "learning_rate": 1.327265787828887e-05, "loss": 0.8329, "step": 2444 }, { "epoch": 0.17212249208025343, "grad_norm": 3.3048923015594482, "learning_rate": 1.3271949260655158e-05, "loss": 0.7313, "step": 2445 }, { "epoch": 0.1721928898275255, "grad_norm": 2.0728261470794678, "learning_rate": 1.3271240316937124e-05, "loss": 0.7123, "step": 2446 }, { "epoch": 0.1722632875747976, "grad_norm": 2.5477242469787598, "learning_rate": 1.3270531047171625e-05, "loss": 0.8588, "step": 2447 }, { "epoch": 0.1723336853220697, "grad_norm": 2.1862668991088867, "learning_rate": 1.3269821451395539e-05, "loss": 0.7753, "step": 2448 }, { "epoch": 0.17240408306934177, "grad_norm": 2.038264513015747, "learning_rate": 1.3269111529645756e-05, "loss": 0.8689, "step": 2449 }, { "epoch": 0.17247448081661387, "grad_norm": 2.775592803955078, "learning_rate": 1.3268401281959187e-05, "loss": 0.6616, "step": 2450 }, { "epoch": 0.17254487856388595, "grad_norm": 2.0679690837860107, "learning_rate": 1.326769070837276e-05, "loss": 0.5828, "step": 2451 }, { "epoch": 0.17261527631115806, "grad_norm": 2.33280086517334, "learning_rate": 1.3266979808923416e-05, "loss": 0.6991, "step": 2452 }, { "epoch": 0.17268567405843013, "grad_norm": 1.7909483909606934, "learning_rate": 1.3266268583648114e-05, "loss": 0.6901, "step": 2453 }, { "epoch": 0.1727560718057022, "grad_norm": 1.9619282484054565, "learning_rate": 1.3265557032583836e-05, "loss": 0.8418, "step": 2454 }, { "epoch": 0.17282646955297432, "grad_norm": 1.8085087537765503, "learning_rate": 1.3264845155767572e-05, "loss": 0.891, "step": 2455 }, { "epoch": 0.1728968673002464, "grad_norm": 2.3299672603607178, "learning_rate": 1.3264132953236335e-05, "loss": 0.7147, "step": 2456 }, { "epoch": 0.17296726504751847, "grad_norm": 1.9719486236572266, "learning_rate": 1.3263420425027154e-05, "loss": 0.5457, "step": 2457 }, { "epoch": 0.17303766279479058, "grad_norm": 2.506456136703491, "learning_rate": 1.3262707571177071e-05, "loss": 0.8225, "step": 2458 }, { "epoch": 0.17310806054206265, "grad_norm": 2.1667258739471436, "learning_rate": 1.3261994391723151e-05, "loss": 0.6646, "step": 2459 }, { "epoch": 0.17317845828933473, "grad_norm": 2.641432762145996, "learning_rate": 1.326128088670247e-05, "loss": 0.8489, "step": 2460 }, { "epoch": 0.17324885603660684, "grad_norm": 2.1061830520629883, "learning_rate": 1.3260567056152126e-05, "loss": 0.9063, "step": 2461 }, { "epoch": 0.1733192537838789, "grad_norm": 2.0071802139282227, "learning_rate": 1.325985290010923e-05, "loss": 0.8555, "step": 2462 }, { "epoch": 0.173389651531151, "grad_norm": 2.090104579925537, "learning_rate": 1.3259138418610912e-05, "loss": 0.7322, "step": 2463 }, { "epoch": 0.1734600492784231, "grad_norm": 1.9993388652801514, "learning_rate": 1.325842361169432e-05, "loss": 0.7575, "step": 2464 }, { "epoch": 0.17353044702569517, "grad_norm": 2.194506883621216, "learning_rate": 1.3257708479396616e-05, "loss": 0.7638, "step": 2465 }, { "epoch": 0.17360084477296728, "grad_norm": 1.8301949501037598, "learning_rate": 1.3256993021754982e-05, "loss": 0.7987, "step": 2466 }, { "epoch": 0.17367124252023936, "grad_norm": 2.2814159393310547, "learning_rate": 1.3256277238806614e-05, "loss": 0.7377, "step": 2467 }, { "epoch": 0.17374164026751143, "grad_norm": 2.2343010902404785, "learning_rate": 1.3255561130588725e-05, "loss": 0.7746, "step": 2468 }, { "epoch": 0.17381203801478354, "grad_norm": 2.029341697692871, "learning_rate": 1.3254844697138545e-05, "loss": 0.7983, "step": 2469 }, { "epoch": 0.17388243576205561, "grad_norm": 1.923988699913025, "learning_rate": 1.325412793849333e-05, "loss": 0.6752, "step": 2470 }, { "epoch": 0.1739528335093277, "grad_norm": 2.6908857822418213, "learning_rate": 1.3253410854690335e-05, "loss": 0.7238, "step": 2471 }, { "epoch": 0.1740232312565998, "grad_norm": 2.0780928134918213, "learning_rate": 1.3252693445766849e-05, "loss": 0.8495, "step": 2472 }, { "epoch": 0.17409362900387187, "grad_norm": 1.9729321002960205, "learning_rate": 1.3251975711760167e-05, "loss": 0.6927, "step": 2473 }, { "epoch": 0.17416402675114395, "grad_norm": 2.0974180698394775, "learning_rate": 1.3251257652707608e-05, "loss": 0.5977, "step": 2474 }, { "epoch": 0.17423442449841606, "grad_norm": 2.371176242828369, "learning_rate": 1.3250539268646499e-05, "loss": 0.8417, "step": 2475 }, { "epoch": 0.17430482224568813, "grad_norm": 2.5941402912139893, "learning_rate": 1.3249820559614192e-05, "loss": 0.9982, "step": 2476 }, { "epoch": 0.1743752199929602, "grad_norm": 2.5652480125427246, "learning_rate": 1.3249101525648057e-05, "loss": 0.7126, "step": 2477 }, { "epoch": 0.17444561774023232, "grad_norm": 2.2121517658233643, "learning_rate": 1.3248382166785472e-05, "loss": 0.7382, "step": 2478 }, { "epoch": 0.1745160154875044, "grad_norm": 2.478785276412964, "learning_rate": 1.324766248306384e-05, "loss": 0.7456, "step": 2479 }, { "epoch": 0.1745864132347765, "grad_norm": 3.6467998027801514, "learning_rate": 1.3246942474520576e-05, "loss": 0.6537, "step": 2480 }, { "epoch": 0.17465681098204858, "grad_norm": 2.4121546745300293, "learning_rate": 1.3246222141193117e-05, "loss": 0.7841, "step": 2481 }, { "epoch": 0.17472720872932065, "grad_norm": 1.8438323736190796, "learning_rate": 1.324550148311891e-05, "loss": 0.7528, "step": 2482 }, { "epoch": 0.17479760647659276, "grad_norm": 2.4135732650756836, "learning_rate": 1.3244780500335426e-05, "loss": 0.8292, "step": 2483 }, { "epoch": 0.17486800422386484, "grad_norm": 2.034721612930298, "learning_rate": 1.3244059192880148e-05, "loss": 0.8306, "step": 2484 }, { "epoch": 0.17493840197113691, "grad_norm": 2.128718614578247, "learning_rate": 1.3243337560790579e-05, "loss": 0.6647, "step": 2485 }, { "epoch": 0.17500879971840902, "grad_norm": 2.137626886367798, "learning_rate": 1.3242615604104235e-05, "loss": 0.728, "step": 2486 }, { "epoch": 0.1750791974656811, "grad_norm": 2.1954214572906494, "learning_rate": 1.3241893322858651e-05, "loss": 0.6652, "step": 2487 }, { "epoch": 0.17514959521295317, "grad_norm": 2.067700147628784, "learning_rate": 1.324117071709138e-05, "loss": 0.7745, "step": 2488 }, { "epoch": 0.17521999296022528, "grad_norm": 2.0748746395111084, "learning_rate": 1.3240447786839994e-05, "loss": 0.8401, "step": 2489 }, { "epoch": 0.17529039070749736, "grad_norm": 3.0510470867156982, "learning_rate": 1.3239724532142073e-05, "loss": 0.6587, "step": 2490 }, { "epoch": 0.17536078845476943, "grad_norm": 2.399211883544922, "learning_rate": 1.3239000953035224e-05, "loss": 0.7293, "step": 2491 }, { "epoch": 0.17543118620204154, "grad_norm": 2.3822696208953857, "learning_rate": 1.3238277049557064e-05, "loss": 0.8812, "step": 2492 }, { "epoch": 0.17550158394931362, "grad_norm": 2.2642223834991455, "learning_rate": 1.323755282174523e-05, "loss": 0.811, "step": 2493 }, { "epoch": 0.17557198169658572, "grad_norm": 2.1361095905303955, "learning_rate": 1.3236828269637377e-05, "loss": 0.8391, "step": 2494 }, { "epoch": 0.1756423794438578, "grad_norm": 2.250175714492798, "learning_rate": 1.3236103393271175e-05, "loss": 0.797, "step": 2495 }, { "epoch": 0.17571277719112988, "grad_norm": 2.177765369415283, "learning_rate": 1.3235378192684309e-05, "loss": 0.6579, "step": 2496 }, { "epoch": 0.17578317493840198, "grad_norm": 2.17628812789917, "learning_rate": 1.3234652667914482e-05, "loss": 0.7547, "step": 2497 }, { "epoch": 0.17585357268567406, "grad_norm": 2.1396942138671875, "learning_rate": 1.3233926818999416e-05, "loss": 0.7059, "step": 2498 }, { "epoch": 0.17592397043294614, "grad_norm": 1.8798319101333618, "learning_rate": 1.323320064597685e-05, "loss": 0.6737, "step": 2499 }, { "epoch": 0.17599436818021824, "grad_norm": 2.8714842796325684, "learning_rate": 1.323247414888454e-05, "loss": 0.6814, "step": 2500 }, { "epoch": 0.17606476592749032, "grad_norm": 2.4333648681640625, "learning_rate": 1.3231747327760252e-05, "loss": 0.8282, "step": 2501 }, { "epoch": 0.1761351636747624, "grad_norm": 2.19931697845459, "learning_rate": 1.3231020182641776e-05, "loss": 0.7011, "step": 2502 }, { "epoch": 0.1762055614220345, "grad_norm": 2.0979816913604736, "learning_rate": 1.3230292713566919e-05, "loss": 0.7723, "step": 2503 }, { "epoch": 0.17627595916930658, "grad_norm": 1.8135102987289429, "learning_rate": 1.3229564920573499e-05, "loss": 0.72, "step": 2504 }, { "epoch": 0.17634635691657866, "grad_norm": 2.125945568084717, "learning_rate": 1.322883680369936e-05, "loss": 0.7322, "step": 2505 }, { "epoch": 0.17641675466385076, "grad_norm": 2.1808784008026123, "learning_rate": 1.3228108362982352e-05, "loss": 0.8914, "step": 2506 }, { "epoch": 0.17648715241112284, "grad_norm": 2.2015037536621094, "learning_rate": 1.322737959846035e-05, "loss": 0.7146, "step": 2507 }, { "epoch": 0.17655755015839494, "grad_norm": 2.0767343044281006, "learning_rate": 1.3226650510171244e-05, "loss": 0.7698, "step": 2508 }, { "epoch": 0.17662794790566702, "grad_norm": 1.9536254405975342, "learning_rate": 1.322592109815294e-05, "loss": 0.6925, "step": 2509 }, { "epoch": 0.1766983456529391, "grad_norm": 1.7571136951446533, "learning_rate": 1.3225191362443357e-05, "loss": 0.6414, "step": 2510 }, { "epoch": 0.1767687434002112, "grad_norm": 3.1137123107910156, "learning_rate": 1.322446130308044e-05, "loss": 0.5837, "step": 2511 }, { "epoch": 0.17683914114748328, "grad_norm": 2.6278882026672363, "learning_rate": 1.322373092010214e-05, "loss": 0.798, "step": 2512 }, { "epoch": 0.17690953889475536, "grad_norm": 3.15384578704834, "learning_rate": 1.3223000213546434e-05, "loss": 0.919, "step": 2513 }, { "epoch": 0.17697993664202746, "grad_norm": 2.062216281890869, "learning_rate": 1.322226918345131e-05, "loss": 0.6808, "step": 2514 }, { "epoch": 0.17705033438929954, "grad_norm": 2.1357858180999756, "learning_rate": 1.3221537829854777e-05, "loss": 0.7989, "step": 2515 }, { "epoch": 0.17712073213657162, "grad_norm": 2.148599624633789, "learning_rate": 1.3220806152794856e-05, "loss": 0.7456, "step": 2516 }, { "epoch": 0.17719112988384372, "grad_norm": 2.3659207820892334, "learning_rate": 1.3220074152309592e-05, "loss": 0.6702, "step": 2517 }, { "epoch": 0.1772615276311158, "grad_norm": 4.222046852111816, "learning_rate": 1.3219341828437038e-05, "loss": 0.8006, "step": 2518 }, { "epoch": 0.17733192537838788, "grad_norm": 1.9350900650024414, "learning_rate": 1.321860918121527e-05, "loss": 0.7844, "step": 2519 }, { "epoch": 0.17740232312565998, "grad_norm": 1.9199578762054443, "learning_rate": 1.3217876210682378e-05, "loss": 0.8074, "step": 2520 }, { "epoch": 0.17747272087293206, "grad_norm": 2.167600631713867, "learning_rate": 1.3217142916876471e-05, "loss": 0.7737, "step": 2521 }, { "epoch": 0.17754311862020417, "grad_norm": 1.9414867162704468, "learning_rate": 1.3216409299835674e-05, "loss": 0.6745, "step": 2522 }, { "epoch": 0.17761351636747624, "grad_norm": 2.4856436252593994, "learning_rate": 1.3215675359598127e-05, "loss": 0.7607, "step": 2523 }, { "epoch": 0.17768391411474832, "grad_norm": 2.0950028896331787, "learning_rate": 1.3214941096201987e-05, "loss": 0.7793, "step": 2524 }, { "epoch": 0.17775431186202043, "grad_norm": 1.9961518049240112, "learning_rate": 1.3214206509685435e-05, "loss": 0.7826, "step": 2525 }, { "epoch": 0.1778247096092925, "grad_norm": 2.208261489868164, "learning_rate": 1.3213471600086655e-05, "loss": 0.6925, "step": 2526 }, { "epoch": 0.17789510735656458, "grad_norm": 2.2063775062561035, "learning_rate": 1.321273636744386e-05, "loss": 0.7345, "step": 2527 }, { "epoch": 0.17796550510383669, "grad_norm": 2.2562854290008545, "learning_rate": 1.3212000811795275e-05, "loss": 0.7439, "step": 2528 }, { "epoch": 0.17803590285110876, "grad_norm": 2.583740472793579, "learning_rate": 1.3211264933179144e-05, "loss": 0.6826, "step": 2529 }, { "epoch": 0.17810630059838084, "grad_norm": 2.054724931716919, "learning_rate": 1.321052873163372e-05, "loss": 0.7054, "step": 2530 }, { "epoch": 0.17817669834565295, "grad_norm": 2.1711981296539307, "learning_rate": 1.3209792207197286e-05, "loss": 0.8167, "step": 2531 }, { "epoch": 0.17824709609292502, "grad_norm": 2.4675133228302, "learning_rate": 1.320905535990813e-05, "loss": 0.7572, "step": 2532 }, { "epoch": 0.1783174938401971, "grad_norm": 2.1428332328796387, "learning_rate": 1.3208318189804562e-05, "loss": 0.8082, "step": 2533 }, { "epoch": 0.1783878915874692, "grad_norm": 2.0781171321868896, "learning_rate": 1.3207580696924908e-05, "loss": 0.8688, "step": 2534 }, { "epoch": 0.17845828933474128, "grad_norm": 2.3209595680236816, "learning_rate": 1.3206842881307512e-05, "loss": 0.7106, "step": 2535 }, { "epoch": 0.1785286870820134, "grad_norm": 2.0255346298217773, "learning_rate": 1.3206104742990736e-05, "loss": 0.6834, "step": 2536 }, { "epoch": 0.17859908482928546, "grad_norm": 1.9358389377593994, "learning_rate": 1.3205366282012951e-05, "loss": 0.6892, "step": 2537 }, { "epoch": 0.17866948257655754, "grad_norm": 2.129214286804199, "learning_rate": 1.3204627498412554e-05, "loss": 0.8404, "step": 2538 }, { "epoch": 0.17873988032382965, "grad_norm": 2.8582489490509033, "learning_rate": 1.3203888392227955e-05, "loss": 0.7346, "step": 2539 }, { "epoch": 0.17881027807110172, "grad_norm": 2.430223226547241, "learning_rate": 1.3203148963497579e-05, "loss": 0.6652, "step": 2540 }, { "epoch": 0.1788806758183738, "grad_norm": 1.9675344228744507, "learning_rate": 1.320240921225987e-05, "loss": 0.7822, "step": 2541 }, { "epoch": 0.1789510735656459, "grad_norm": 2.137423276901245, "learning_rate": 1.3201669138553292e-05, "loss": 0.6974, "step": 2542 }, { "epoch": 0.17902147131291798, "grad_norm": 1.893980860710144, "learning_rate": 1.3200928742416315e-05, "loss": 0.7542, "step": 2543 }, { "epoch": 0.17909186906019006, "grad_norm": 2.354923963546753, "learning_rate": 1.3200188023887439e-05, "loss": 0.7338, "step": 2544 }, { "epoch": 0.17916226680746217, "grad_norm": 1.849745512008667, "learning_rate": 1.319944698300517e-05, "loss": 0.7795, "step": 2545 }, { "epoch": 0.17923266455473424, "grad_norm": 1.9921903610229492, "learning_rate": 1.319870561980804e-05, "loss": 0.7453, "step": 2546 }, { "epoch": 0.17930306230200635, "grad_norm": 1.9598453044891357, "learning_rate": 1.319796393433459e-05, "loss": 0.7604, "step": 2547 }, { "epoch": 0.17937346004927843, "grad_norm": 2.1843385696411133, "learning_rate": 1.3197221926623382e-05, "loss": 0.7205, "step": 2548 }, { "epoch": 0.1794438577965505, "grad_norm": 2.0145952701568604, "learning_rate": 1.3196479596712992e-05, "loss": 0.6624, "step": 2549 }, { "epoch": 0.1795142555438226, "grad_norm": 2.1916537284851074, "learning_rate": 1.319573694464202e-05, "loss": 0.6388, "step": 2550 }, { "epoch": 0.1795846532910947, "grad_norm": 2.240485906600952, "learning_rate": 1.3194993970449069e-05, "loss": 0.712, "step": 2551 }, { "epoch": 0.17965505103836676, "grad_norm": 2.299838066101074, "learning_rate": 1.3194250674172772e-05, "loss": 0.8257, "step": 2552 }, { "epoch": 0.17972544878563887, "grad_norm": 2.335177183151245, "learning_rate": 1.3193507055851774e-05, "loss": 0.7383, "step": 2553 }, { "epoch": 0.17979584653291095, "grad_norm": 2.0117366313934326, "learning_rate": 1.3192763115524735e-05, "loss": 0.773, "step": 2554 }, { "epoch": 0.17986624428018302, "grad_norm": 2.2581064701080322, "learning_rate": 1.3192018853230331e-05, "loss": 0.673, "step": 2555 }, { "epoch": 0.17993664202745513, "grad_norm": 2.201690435409546, "learning_rate": 1.319127426900726e-05, "loss": 0.7576, "step": 2556 }, { "epoch": 0.1800070397747272, "grad_norm": 2.33482027053833, "learning_rate": 1.3190529362894234e-05, "loss": 0.8834, "step": 2557 }, { "epoch": 0.18007743752199928, "grad_norm": 2.5799663066864014, "learning_rate": 1.3189784134929978e-05, "loss": 0.781, "step": 2558 }, { "epoch": 0.1801478352692714, "grad_norm": 2.4728870391845703, "learning_rate": 1.3189038585153241e-05, "loss": 0.7747, "step": 2559 }, { "epoch": 0.18021823301654347, "grad_norm": 2.7329437732696533, "learning_rate": 1.3188292713602781e-05, "loss": 0.627, "step": 2560 }, { "epoch": 0.18028863076381557, "grad_norm": 1.8933359384536743, "learning_rate": 1.3187546520317379e-05, "loss": 0.6934, "step": 2561 }, { "epoch": 0.18035902851108765, "grad_norm": 2.2462096214294434, "learning_rate": 1.3186800005335829e-05, "loss": 0.7039, "step": 2562 }, { "epoch": 0.18042942625835973, "grad_norm": 2.419424057006836, "learning_rate": 1.3186053168696946e-05, "loss": 0.808, "step": 2563 }, { "epoch": 0.18049982400563183, "grad_norm": 2.152305841445923, "learning_rate": 1.3185306010439553e-05, "loss": 0.7456, "step": 2564 }, { "epoch": 0.1805702217529039, "grad_norm": 1.9930144548416138, "learning_rate": 1.3184558530602501e-05, "loss": 0.6791, "step": 2565 }, { "epoch": 0.180640619500176, "grad_norm": 2.981900691986084, "learning_rate": 1.318381072922465e-05, "loss": 0.7138, "step": 2566 }, { "epoch": 0.1807110172474481, "grad_norm": 2.074373960494995, "learning_rate": 1.3183062606344878e-05, "loss": 0.79, "step": 2567 }, { "epoch": 0.18078141499472017, "grad_norm": 1.9160135984420776, "learning_rate": 1.318231416200208e-05, "loss": 0.6882, "step": 2568 }, { "epoch": 0.18085181274199225, "grad_norm": 1.7908340692520142, "learning_rate": 1.3181565396235172e-05, "loss": 0.7496, "step": 2569 }, { "epoch": 0.18092221048926435, "grad_norm": 2.3006176948547363, "learning_rate": 1.318081630908308e-05, "loss": 0.7614, "step": 2570 }, { "epoch": 0.18099260823653643, "grad_norm": 2.047255277633667, "learning_rate": 1.3180066900584752e-05, "loss": 0.7341, "step": 2571 }, { "epoch": 0.1810630059838085, "grad_norm": 3.022780656814575, "learning_rate": 1.3179317170779146e-05, "loss": 0.777, "step": 2572 }, { "epoch": 0.1811334037310806, "grad_norm": 2.029799461364746, "learning_rate": 1.3178567119705247e-05, "loss": 0.736, "step": 2573 }, { "epoch": 0.1812038014783527, "grad_norm": 1.9279718399047852, "learning_rate": 1.3177816747402043e-05, "loss": 0.7287, "step": 2574 }, { "epoch": 0.1812741992256248, "grad_norm": 2.5501017570495605, "learning_rate": 1.3177066053908556e-05, "loss": 0.8234, "step": 2575 }, { "epoch": 0.18134459697289687, "grad_norm": 2.23816180229187, "learning_rate": 1.3176315039263808e-05, "loss": 0.7087, "step": 2576 }, { "epoch": 0.18141499472016895, "grad_norm": 2.1780097484588623, "learning_rate": 1.3175563703506848e-05, "loss": 0.8233, "step": 2577 }, { "epoch": 0.18148539246744105, "grad_norm": 1.982742428779602, "learning_rate": 1.3174812046676739e-05, "loss": 0.7138, "step": 2578 }, { "epoch": 0.18155579021471313, "grad_norm": 2.1223337650299072, "learning_rate": 1.3174060068812557e-05, "loss": 0.7816, "step": 2579 }, { "epoch": 0.1816261879619852, "grad_norm": 2.0995934009552, "learning_rate": 1.3173307769953404e-05, "loss": 0.8094, "step": 2580 }, { "epoch": 0.1816965857092573, "grad_norm": 2.844238519668579, "learning_rate": 1.3172555150138387e-05, "loss": 0.7193, "step": 2581 }, { "epoch": 0.1817669834565294, "grad_norm": 2.3782155513763428, "learning_rate": 1.3171802209406638e-05, "loss": 0.7014, "step": 2582 }, { "epoch": 0.18183738120380147, "grad_norm": 2.1788549423217773, "learning_rate": 1.3171048947797302e-05, "loss": 0.7811, "step": 2583 }, { "epoch": 0.18190777895107357, "grad_norm": 2.029998302459717, "learning_rate": 1.3170295365349545e-05, "loss": 0.8022, "step": 2584 }, { "epoch": 0.18197817669834565, "grad_norm": 2.033193349838257, "learning_rate": 1.3169541462102542e-05, "loss": 0.8016, "step": 2585 }, { "epoch": 0.18204857444561773, "grad_norm": 1.821736216545105, "learning_rate": 1.3168787238095489e-05, "loss": 0.7834, "step": 2586 }, { "epoch": 0.18211897219288983, "grad_norm": 2.1797523498535156, "learning_rate": 1.3168032693367605e-05, "loss": 0.7251, "step": 2587 }, { "epoch": 0.1821893699401619, "grad_norm": 2.5700201988220215, "learning_rate": 1.3167277827958111e-05, "loss": 0.8391, "step": 2588 }, { "epoch": 0.18225976768743402, "grad_norm": 2.2453713417053223, "learning_rate": 1.3166522641906259e-05, "loss": 0.7971, "step": 2589 }, { "epoch": 0.1823301654347061, "grad_norm": 2.3246560096740723, "learning_rate": 1.316576713525131e-05, "loss": 0.7793, "step": 2590 }, { "epoch": 0.18240056318197817, "grad_norm": 2.135981798171997, "learning_rate": 1.3165011308032544e-05, "loss": 0.783, "step": 2591 }, { "epoch": 0.18247096092925028, "grad_norm": 2.7227280139923096, "learning_rate": 1.3164255160289256e-05, "loss": 0.6865, "step": 2592 }, { "epoch": 0.18254135867652235, "grad_norm": 5.095438003540039, "learning_rate": 1.3163498692060761e-05, "loss": 0.7009, "step": 2593 }, { "epoch": 0.18261175642379443, "grad_norm": 1.8484550714492798, "learning_rate": 1.3162741903386387e-05, "loss": 0.6173, "step": 2594 }, { "epoch": 0.18268215417106654, "grad_norm": 2.05271053314209, "learning_rate": 1.3161984794305478e-05, "loss": 0.7753, "step": 2595 }, { "epoch": 0.1827525519183386, "grad_norm": 2.1424920558929443, "learning_rate": 1.3161227364857402e-05, "loss": 0.7265, "step": 2596 }, { "epoch": 0.1828229496656107, "grad_norm": 1.983299732208252, "learning_rate": 1.3160469615081532e-05, "loss": 0.7524, "step": 2597 }, { "epoch": 0.1828933474128828, "grad_norm": 2.0714409351348877, "learning_rate": 1.3159711545017269e-05, "loss": 0.904, "step": 2598 }, { "epoch": 0.18296374516015487, "grad_norm": 2.088263988494873, "learning_rate": 1.3158953154704024e-05, "loss": 0.731, "step": 2599 }, { "epoch": 0.18303414290742695, "grad_norm": 3.235067129135132, "learning_rate": 1.3158194444181227e-05, "loss": 0.6709, "step": 2600 }, { "epoch": 0.18310454065469906, "grad_norm": 2.321969509124756, "learning_rate": 1.3157435413488323e-05, "loss": 0.8552, "step": 2601 }, { "epoch": 0.18317493840197113, "grad_norm": 2.177772283554077, "learning_rate": 1.3156676062664776e-05, "loss": 0.7059, "step": 2602 }, { "epoch": 0.18324533614924324, "grad_norm": 2.129748582839966, "learning_rate": 1.3155916391750064e-05, "loss": 0.834, "step": 2603 }, { "epoch": 0.18331573389651531, "grad_norm": 2.4029152393341064, "learning_rate": 1.3155156400783683e-05, "loss": 0.7735, "step": 2604 }, { "epoch": 0.1833861316437874, "grad_norm": 2.11425518989563, "learning_rate": 1.3154396089805147e-05, "loss": 0.9012, "step": 2605 }, { "epoch": 0.1834565293910595, "grad_norm": 1.973497748374939, "learning_rate": 1.3153635458853986e-05, "loss": 0.7238, "step": 2606 }, { "epoch": 0.18352692713833157, "grad_norm": 1.997349739074707, "learning_rate": 1.3152874507969744e-05, "loss": 0.7601, "step": 2607 }, { "epoch": 0.18359732488560365, "grad_norm": 3.2346019744873047, "learning_rate": 1.3152113237191983e-05, "loss": 0.7174, "step": 2608 }, { "epoch": 0.18366772263287576, "grad_norm": 2.3054420948028564, "learning_rate": 1.3151351646560284e-05, "loss": 0.9005, "step": 2609 }, { "epoch": 0.18373812038014783, "grad_norm": 2.045632839202881, "learning_rate": 1.3150589736114241e-05, "loss": 0.7871, "step": 2610 }, { "epoch": 0.1838085181274199, "grad_norm": 2.4561915397644043, "learning_rate": 1.3149827505893466e-05, "loss": 0.9326, "step": 2611 }, { "epoch": 0.18387891587469202, "grad_norm": 1.83491849899292, "learning_rate": 1.3149064955937592e-05, "loss": 0.6787, "step": 2612 }, { "epoch": 0.1839493136219641, "grad_norm": 2.106222152709961, "learning_rate": 1.3148302086286262e-05, "loss": 0.7202, "step": 2613 }, { "epoch": 0.18401971136923617, "grad_norm": 2.0969901084899902, "learning_rate": 1.3147538896979137e-05, "loss": 0.8073, "step": 2614 }, { "epoch": 0.18409010911650828, "grad_norm": 1.9219626188278198, "learning_rate": 1.3146775388055898e-05, "loss": 0.7487, "step": 2615 }, { "epoch": 0.18416050686378035, "grad_norm": 2.249863386154175, "learning_rate": 1.3146011559556239e-05, "loss": 0.7276, "step": 2616 }, { "epoch": 0.18423090461105246, "grad_norm": 2.390856981277466, "learning_rate": 1.3145247411519872e-05, "loss": 0.8626, "step": 2617 }, { "epoch": 0.18430130235832454, "grad_norm": 2.309621810913086, "learning_rate": 1.3144482943986527e-05, "loss": 0.712, "step": 2618 }, { "epoch": 0.18437170010559661, "grad_norm": 1.9140723943710327, "learning_rate": 1.3143718156995951e-05, "loss": 0.8127, "step": 2619 }, { "epoch": 0.18444209785286872, "grad_norm": 2.037682294845581, "learning_rate": 1.3142953050587903e-05, "loss": 0.7239, "step": 2620 }, { "epoch": 0.1845124956001408, "grad_norm": 2.2149691581726074, "learning_rate": 1.3142187624802163e-05, "loss": 0.8076, "step": 2621 }, { "epoch": 0.18458289334741287, "grad_norm": 1.9748437404632568, "learning_rate": 1.3141421879678524e-05, "loss": 0.7257, "step": 2622 }, { "epoch": 0.18465329109468498, "grad_norm": 2.719993829727173, "learning_rate": 1.3140655815256799e-05, "loss": 0.8001, "step": 2623 }, { "epoch": 0.18472368884195706, "grad_norm": 2.2795767784118652, "learning_rate": 1.313988943157682e-05, "loss": 0.7066, "step": 2624 }, { "epoch": 0.18479408658922913, "grad_norm": 1.9006603956222534, "learning_rate": 1.3139122728678427e-05, "loss": 0.7645, "step": 2625 }, { "epoch": 0.18486448433650124, "grad_norm": 2.548281669616699, "learning_rate": 1.3138355706601484e-05, "loss": 0.8719, "step": 2626 }, { "epoch": 0.18493488208377332, "grad_norm": 2.221097469329834, "learning_rate": 1.3137588365385866e-05, "loss": 0.7694, "step": 2627 }, { "epoch": 0.1850052798310454, "grad_norm": 2.1171281337738037, "learning_rate": 1.3136820705071473e-05, "loss": 0.8129, "step": 2628 }, { "epoch": 0.1850756775783175, "grad_norm": 1.8406339883804321, "learning_rate": 1.3136052725698213e-05, "loss": 0.7661, "step": 2629 }, { "epoch": 0.18514607532558958, "grad_norm": 1.972618818283081, "learning_rate": 1.3135284427306015e-05, "loss": 0.827, "step": 2630 }, { "epoch": 0.18521647307286168, "grad_norm": 2.441446542739868, "learning_rate": 1.3134515809934822e-05, "loss": 0.6671, "step": 2631 }, { "epoch": 0.18528687082013376, "grad_norm": 2.2793939113616943, "learning_rate": 1.3133746873624598e-05, "loss": 0.7872, "step": 2632 }, { "epoch": 0.18535726856740584, "grad_norm": 2.352151393890381, "learning_rate": 1.3132977618415317e-05, "loss": 0.8278, "step": 2633 }, { "epoch": 0.18542766631467794, "grad_norm": 2.3360612392425537, "learning_rate": 1.3132208044346977e-05, "loss": 0.7176, "step": 2634 }, { "epoch": 0.18549806406195002, "grad_norm": 2.3379364013671875, "learning_rate": 1.3131438151459588e-05, "loss": 0.7513, "step": 2635 }, { "epoch": 0.1855684618092221, "grad_norm": 2.258195400238037, "learning_rate": 1.3130667939793175e-05, "loss": 0.7619, "step": 2636 }, { "epoch": 0.1856388595564942, "grad_norm": 2.1863763332366943, "learning_rate": 1.3129897409387782e-05, "loss": 0.7743, "step": 2637 }, { "epoch": 0.18570925730376628, "grad_norm": 2.183056354522705, "learning_rate": 1.3129126560283472e-05, "loss": 0.8822, "step": 2638 }, { "epoch": 0.18577965505103836, "grad_norm": 2.192319631576538, "learning_rate": 1.3128355392520324e-05, "loss": 0.8645, "step": 2639 }, { "epoch": 0.18585005279831046, "grad_norm": 2.2728517055511475, "learning_rate": 1.3127583906138427e-05, "loss": 0.8398, "step": 2640 }, { "epoch": 0.18592045054558254, "grad_norm": 2.1914913654327393, "learning_rate": 1.3126812101177893e-05, "loss": 0.6859, "step": 2641 }, { "epoch": 0.18599084829285462, "grad_norm": 2.056480884552002, "learning_rate": 1.312603997767885e-05, "loss": 0.9418, "step": 2642 }, { "epoch": 0.18606124604012672, "grad_norm": 2.1114909648895264, "learning_rate": 1.312526753568144e-05, "loss": 0.8326, "step": 2643 }, { "epoch": 0.1861316437873988, "grad_norm": 2.1123087406158447, "learning_rate": 1.3124494775225822e-05, "loss": 0.7574, "step": 2644 }, { "epoch": 0.1862020415346709, "grad_norm": 2.235158920288086, "learning_rate": 1.3123721696352178e-05, "loss": 0.7829, "step": 2645 }, { "epoch": 0.18627243928194298, "grad_norm": 2.049412727355957, "learning_rate": 1.3122948299100692e-05, "loss": 0.8151, "step": 2646 }, { "epoch": 0.18634283702921506, "grad_norm": 1.8408353328704834, "learning_rate": 1.3122174583511582e-05, "loss": 0.7386, "step": 2647 }, { "epoch": 0.18641323477648716, "grad_norm": 1.750189185142517, "learning_rate": 1.312140054962507e-05, "loss": 0.8086, "step": 2648 }, { "epoch": 0.18648363252375924, "grad_norm": 1.9384422302246094, "learning_rate": 1.3120626197481399e-05, "loss": 0.7143, "step": 2649 }, { "epoch": 0.18655403027103132, "grad_norm": 2.1857073307037354, "learning_rate": 1.3119851527120828e-05, "loss": 0.7816, "step": 2650 }, { "epoch": 0.18662442801830342, "grad_norm": 2.556408405303955, "learning_rate": 1.3119076538583635e-05, "loss": 0.8768, "step": 2651 }, { "epoch": 0.1866948257655755, "grad_norm": 2.268882989883423, "learning_rate": 1.3118301231910112e-05, "loss": 0.7445, "step": 2652 }, { "epoch": 0.18676522351284758, "grad_norm": 2.4905362129211426, "learning_rate": 1.3117525607140565e-05, "loss": 0.6893, "step": 2653 }, { "epoch": 0.18683562126011968, "grad_norm": 2.2609946727752686, "learning_rate": 1.3116749664315323e-05, "loss": 0.758, "step": 2654 }, { "epoch": 0.18690601900739176, "grad_norm": 2.7874274253845215, "learning_rate": 1.3115973403474724e-05, "loss": 0.6835, "step": 2655 }, { "epoch": 0.18697641675466384, "grad_norm": 2.0392112731933594, "learning_rate": 1.3115196824659131e-05, "loss": 0.8259, "step": 2656 }, { "epoch": 0.18704681450193594, "grad_norm": 2.1472535133361816, "learning_rate": 1.3114419927908916e-05, "loss": 0.8148, "step": 2657 }, { "epoch": 0.18711721224920802, "grad_norm": 1.9952208995819092, "learning_rate": 1.3113642713264472e-05, "loss": 0.7473, "step": 2658 }, { "epoch": 0.18718760999648013, "grad_norm": 1.7890840768814087, "learning_rate": 1.3112865180766206e-05, "loss": 0.7515, "step": 2659 }, { "epoch": 0.1872580077437522, "grad_norm": 2.200063467025757, "learning_rate": 1.3112087330454543e-05, "loss": 0.7176, "step": 2660 }, { "epoch": 0.18732840549102428, "grad_norm": 1.8022172451019287, "learning_rate": 1.3111309162369923e-05, "loss": 0.6825, "step": 2661 }, { "epoch": 0.18739880323829639, "grad_norm": 1.8918417692184448, "learning_rate": 1.3110530676552808e-05, "loss": 0.7896, "step": 2662 }, { "epoch": 0.18746920098556846, "grad_norm": 2.0192344188690186, "learning_rate": 1.3109751873043668e-05, "loss": 0.7555, "step": 2663 }, { "epoch": 0.18753959873284054, "grad_norm": 2.473616361618042, "learning_rate": 1.3108972751882991e-05, "loss": 0.6899, "step": 2664 }, { "epoch": 0.18760999648011265, "grad_norm": 2.1776304244995117, "learning_rate": 1.3108193313111292e-05, "loss": 0.7622, "step": 2665 }, { "epoch": 0.18768039422738472, "grad_norm": 2.513071060180664, "learning_rate": 1.310741355676909e-05, "loss": 0.5605, "step": 2666 }, { "epoch": 0.1877507919746568, "grad_norm": 1.8833409547805786, "learning_rate": 1.3106633482896925e-05, "loss": 0.7278, "step": 2667 }, { "epoch": 0.1878211897219289, "grad_norm": 1.8820186853408813, "learning_rate": 1.3105853091535358e-05, "loss": 0.7216, "step": 2668 }, { "epoch": 0.18789158746920098, "grad_norm": 1.8471460342407227, "learning_rate": 1.3105072382724955e-05, "loss": 0.7329, "step": 2669 }, { "epoch": 0.18796198521647306, "grad_norm": 1.9577990770339966, "learning_rate": 1.3104291356506311e-05, "loss": 0.7121, "step": 2670 }, { "epoch": 0.18803238296374516, "grad_norm": 2.065424680709839, "learning_rate": 1.310351001292003e-05, "loss": 0.7207, "step": 2671 }, { "epoch": 0.18810278071101724, "grad_norm": 2.22249436378479, "learning_rate": 1.310272835200674e-05, "loss": 0.7717, "step": 2672 }, { "epoch": 0.18817317845828935, "grad_norm": 2.4484732151031494, "learning_rate": 1.3101946373807071e-05, "loss": 0.7987, "step": 2673 }, { "epoch": 0.18824357620556142, "grad_norm": 1.94876229763031, "learning_rate": 1.3101164078361687e-05, "loss": 0.7456, "step": 2674 }, { "epoch": 0.1883139739528335, "grad_norm": 2.24408221244812, "learning_rate": 1.3100381465711256e-05, "loss": 0.7804, "step": 2675 }, { "epoch": 0.1883843717001056, "grad_norm": 2.2092385292053223, "learning_rate": 1.3099598535896467e-05, "loss": 0.71, "step": 2676 }, { "epoch": 0.18845476944737768, "grad_norm": 2.3538308143615723, "learning_rate": 1.3098815288958028e-05, "loss": 0.8294, "step": 2677 }, { "epoch": 0.18852516719464976, "grad_norm": 2.5669524669647217, "learning_rate": 1.3098031724936657e-05, "loss": 0.7396, "step": 2678 }, { "epoch": 0.18859556494192187, "grad_norm": 2.335832357406616, "learning_rate": 1.3097247843873093e-05, "loss": 0.765, "step": 2679 }, { "epoch": 0.18866596268919394, "grad_norm": 2.497159481048584, "learning_rate": 1.3096463645808093e-05, "loss": 0.7495, "step": 2680 }, { "epoch": 0.18873636043646602, "grad_norm": 2.2877869606018066, "learning_rate": 1.3095679130782427e-05, "loss": 0.6619, "step": 2681 }, { "epoch": 0.18880675818373813, "grad_norm": 1.9491753578186035, "learning_rate": 1.309489429883688e-05, "loss": 0.6713, "step": 2682 }, { "epoch": 0.1888771559310102, "grad_norm": 2.1020352840423584, "learning_rate": 1.3094109150012263e-05, "loss": 0.7333, "step": 2683 }, { "epoch": 0.18894755367828228, "grad_norm": 2.605492353439331, "learning_rate": 1.309332368434939e-05, "loss": 0.7404, "step": 2684 }, { "epoch": 0.1890179514255544, "grad_norm": 2.2921059131622314, "learning_rate": 1.30925379018891e-05, "loss": 0.7879, "step": 2685 }, { "epoch": 0.18908834917282646, "grad_norm": 2.1571006774902344, "learning_rate": 1.3091751802672246e-05, "loss": 0.7287, "step": 2686 }, { "epoch": 0.18915874692009857, "grad_norm": 2.3039205074310303, "learning_rate": 1.30909653867397e-05, "loss": 0.7421, "step": 2687 }, { "epoch": 0.18922914466737065, "grad_norm": 2.1782045364379883, "learning_rate": 1.3090178654132346e-05, "loss": 0.7328, "step": 2688 }, { "epoch": 0.18929954241464272, "grad_norm": 1.9497147798538208, "learning_rate": 1.3089391604891089e-05, "loss": 0.6523, "step": 2689 }, { "epoch": 0.18936994016191483, "grad_norm": 2.2910635471343994, "learning_rate": 1.3088604239056848e-05, "loss": 0.6109, "step": 2690 }, { "epoch": 0.1894403379091869, "grad_norm": 2.323469638824463, "learning_rate": 1.3087816556670557e-05, "loss": 0.8345, "step": 2691 }, { "epoch": 0.18951073565645898, "grad_norm": 2.1230971813201904, "learning_rate": 1.3087028557773171e-05, "loss": 0.7779, "step": 2692 }, { "epoch": 0.1895811334037311, "grad_norm": 2.0256264209747314, "learning_rate": 1.3086240242405659e-05, "loss": 0.6761, "step": 2693 }, { "epoch": 0.18965153115100317, "grad_norm": 2.2368178367614746, "learning_rate": 1.3085451610609002e-05, "loss": 0.8045, "step": 2694 }, { "epoch": 0.18972192889827524, "grad_norm": 1.9269249439239502, "learning_rate": 1.3084662662424205e-05, "loss": 0.707, "step": 2695 }, { "epoch": 0.18979232664554735, "grad_norm": 2.1506247520446777, "learning_rate": 1.3083873397892287e-05, "loss": 0.7315, "step": 2696 }, { "epoch": 0.18986272439281943, "grad_norm": 2.223329782485962, "learning_rate": 1.308308381705428e-05, "loss": 0.8151, "step": 2697 }, { "epoch": 0.1899331221400915, "grad_norm": 1.9812963008880615, "learning_rate": 1.3082293919951235e-05, "loss": 0.6929, "step": 2698 }, { "epoch": 0.1900035198873636, "grad_norm": 1.9212409257888794, "learning_rate": 1.3081503706624224e-05, "loss": 0.8176, "step": 2699 }, { "epoch": 0.1900739176346357, "grad_norm": 2.3694651126861572, "learning_rate": 1.3080713177114324e-05, "loss": 0.7596, "step": 2700 }, { "epoch": 0.1901443153819078, "grad_norm": 2.101994037628174, "learning_rate": 1.307992233146264e-05, "loss": 0.7429, "step": 2701 }, { "epoch": 0.19021471312917987, "grad_norm": 1.9864675998687744, "learning_rate": 1.307913116971029e-05, "loss": 0.7076, "step": 2702 }, { "epoch": 0.19028511087645195, "grad_norm": 2.2184348106384277, "learning_rate": 1.3078339691898402e-05, "loss": 0.7361, "step": 2703 }, { "epoch": 0.19035550862372405, "grad_norm": 1.8963897228240967, "learning_rate": 1.307754789806813e-05, "loss": 0.6531, "step": 2704 }, { "epoch": 0.19042590637099613, "grad_norm": 1.8219187259674072, "learning_rate": 1.3076755788260638e-05, "loss": 0.6639, "step": 2705 }, { "epoch": 0.1904963041182682, "grad_norm": 1.9745874404907227, "learning_rate": 1.3075963362517108e-05, "loss": 0.7756, "step": 2706 }, { "epoch": 0.1905667018655403, "grad_norm": 2.273331642150879, "learning_rate": 1.3075170620878743e-05, "loss": 0.7995, "step": 2707 }, { "epoch": 0.1906370996128124, "grad_norm": 2.562375783920288, "learning_rate": 1.3074377563386755e-05, "loss": 0.6957, "step": 2708 }, { "epoch": 0.19070749736008447, "grad_norm": 2.0580031871795654, "learning_rate": 1.3073584190082375e-05, "loss": 0.6794, "step": 2709 }, { "epoch": 0.19077789510735657, "grad_norm": 2.2233617305755615, "learning_rate": 1.307279050100685e-05, "loss": 0.7888, "step": 2710 }, { "epoch": 0.19084829285462865, "grad_norm": 1.9741060733795166, "learning_rate": 1.3071996496201452e-05, "loss": 0.7795, "step": 2711 }, { "epoch": 0.19091869060190073, "grad_norm": 2.3727169036865234, "learning_rate": 1.3071202175707454e-05, "loss": 0.7464, "step": 2712 }, { "epoch": 0.19098908834917283, "grad_norm": 1.7336817979812622, "learning_rate": 1.3070407539566157e-05, "loss": 0.7341, "step": 2713 }, { "epoch": 0.1910594860964449, "grad_norm": 1.9446141719818115, "learning_rate": 1.3069612587818874e-05, "loss": 0.7829, "step": 2714 }, { "epoch": 0.191129883843717, "grad_norm": 2.096207618713379, "learning_rate": 1.3068817320506938e-05, "loss": 0.6939, "step": 2715 }, { "epoch": 0.1912002815909891, "grad_norm": 2.1018364429473877, "learning_rate": 1.306802173767169e-05, "loss": 0.7565, "step": 2716 }, { "epoch": 0.19127067933826117, "grad_norm": 1.9718762636184692, "learning_rate": 1.3067225839354496e-05, "loss": 0.7871, "step": 2717 }, { "epoch": 0.19134107708553327, "grad_norm": 2.172664165496826, "learning_rate": 1.3066429625596737e-05, "loss": 0.6918, "step": 2718 }, { "epoch": 0.19141147483280535, "grad_norm": 1.9310816526412964, "learning_rate": 1.306563309643981e-05, "loss": 0.7932, "step": 2719 }, { "epoch": 0.19148187258007743, "grad_norm": 1.8669805526733398, "learning_rate": 1.3064836251925121e-05, "loss": 0.77, "step": 2720 }, { "epoch": 0.19155227032734953, "grad_norm": 2.0125415325164795, "learning_rate": 1.3064039092094105e-05, "loss": 0.7287, "step": 2721 }, { "epoch": 0.1916226680746216, "grad_norm": 2.396197557449341, "learning_rate": 1.3063241616988205e-05, "loss": 0.802, "step": 2722 }, { "epoch": 0.1916930658218937, "grad_norm": 2.1417319774627686, "learning_rate": 1.306244382664888e-05, "loss": 0.6947, "step": 2723 }, { "epoch": 0.1917634635691658, "grad_norm": 2.1334128379821777, "learning_rate": 1.3061645721117614e-05, "loss": 0.7823, "step": 2724 }, { "epoch": 0.19183386131643787, "grad_norm": 2.569943428039551, "learning_rate": 1.3060847300435894e-05, "loss": 0.716, "step": 2725 }, { "epoch": 0.19190425906370995, "grad_norm": 3.4548208713531494, "learning_rate": 1.3060048564645236e-05, "loss": 0.6985, "step": 2726 }, { "epoch": 0.19197465681098205, "grad_norm": 1.8567713499069214, "learning_rate": 1.3059249513787161e-05, "loss": 0.6867, "step": 2727 }, { "epoch": 0.19204505455825413, "grad_norm": 2.592452049255371, "learning_rate": 1.3058450147903217e-05, "loss": 0.6469, "step": 2728 }, { "epoch": 0.19211545230552624, "grad_norm": 2.5163986682891846, "learning_rate": 1.3057650467034967e-05, "loss": 0.6935, "step": 2729 }, { "epoch": 0.1921858500527983, "grad_norm": 2.787259340286255, "learning_rate": 1.305685047122398e-05, "loss": 0.7104, "step": 2730 }, { "epoch": 0.1922562478000704, "grad_norm": 1.847646951675415, "learning_rate": 1.3056050160511853e-05, "loss": 0.6959, "step": 2731 }, { "epoch": 0.1923266455473425, "grad_norm": 3.6204771995544434, "learning_rate": 1.305524953494019e-05, "loss": 0.7119, "step": 2732 }, { "epoch": 0.19239704329461457, "grad_norm": 2.364389181137085, "learning_rate": 1.3054448594550623e-05, "loss": 0.7977, "step": 2733 }, { "epoch": 0.19246744104188665, "grad_norm": 1.944573163986206, "learning_rate": 1.305364733938479e-05, "loss": 0.7006, "step": 2734 }, { "epoch": 0.19253783878915876, "grad_norm": 2.1404969692230225, "learning_rate": 1.3052845769484348e-05, "loss": 0.8093, "step": 2735 }, { "epoch": 0.19260823653643083, "grad_norm": 2.393246650695801, "learning_rate": 1.3052043884890974e-05, "loss": 0.8191, "step": 2736 }, { "epoch": 0.1926786342837029, "grad_norm": 2.018052816390991, "learning_rate": 1.3051241685646359e-05, "loss": 0.6981, "step": 2737 }, { "epoch": 0.19274903203097501, "grad_norm": 2.018465757369995, "learning_rate": 1.3050439171792205e-05, "loss": 0.7866, "step": 2738 }, { "epoch": 0.1928194297782471, "grad_norm": 2.519308567047119, "learning_rate": 1.304963634337024e-05, "loss": 0.7022, "step": 2739 }, { "epoch": 0.19288982752551917, "grad_norm": 2.117354154586792, "learning_rate": 1.3048833200422203e-05, "loss": 0.7209, "step": 2740 }, { "epoch": 0.19296022527279127, "grad_norm": 1.8676178455352783, "learning_rate": 1.304802974298985e-05, "loss": 0.7249, "step": 2741 }, { "epoch": 0.19303062302006335, "grad_norm": 1.9778831005096436, "learning_rate": 1.3047225971114952e-05, "loss": 0.6649, "step": 2742 }, { "epoch": 0.19310102076733546, "grad_norm": 2.2580888271331787, "learning_rate": 1.30464218848393e-05, "loss": 0.7246, "step": 2743 }, { "epoch": 0.19317141851460753, "grad_norm": 2.3609445095062256, "learning_rate": 1.30456174842047e-05, "loss": 0.7443, "step": 2744 }, { "epoch": 0.1932418162618796, "grad_norm": 2.212705373764038, "learning_rate": 1.304481276925297e-05, "loss": 0.7427, "step": 2745 }, { "epoch": 0.19331221400915172, "grad_norm": 2.079806089401245, "learning_rate": 1.3044007740025949e-05, "loss": 0.7011, "step": 2746 }, { "epoch": 0.1933826117564238, "grad_norm": 1.9634250402450562, "learning_rate": 1.3043202396565492e-05, "loss": 0.788, "step": 2747 }, { "epoch": 0.19345300950369587, "grad_norm": 2.3731157779693604, "learning_rate": 1.3042396738913469e-05, "loss": 0.6756, "step": 2748 }, { "epoch": 0.19352340725096798, "grad_norm": 2.054558038711548, "learning_rate": 1.304159076711177e-05, "loss": 0.8084, "step": 2749 }, { "epoch": 0.19359380499824005, "grad_norm": 2.0574147701263428, "learning_rate": 1.3040784481202292e-05, "loss": 0.7848, "step": 2750 }, { "epoch": 0.19366420274551213, "grad_norm": 2.184295892715454, "learning_rate": 1.303997788122696e-05, "loss": 0.8919, "step": 2751 }, { "epoch": 0.19373460049278424, "grad_norm": 2.161620855331421, "learning_rate": 1.3039170967227707e-05, "loss": 0.7383, "step": 2752 }, { "epoch": 0.19380499824005631, "grad_norm": 2.158189535140991, "learning_rate": 1.3038363739246484e-05, "loss": 0.8294, "step": 2753 }, { "epoch": 0.1938753959873284, "grad_norm": 2.151343822479248, "learning_rate": 1.3037556197325266e-05, "loss": 0.7484, "step": 2754 }, { "epoch": 0.1939457937346005, "grad_norm": 2.10341477394104, "learning_rate": 1.303674834150603e-05, "loss": 0.8381, "step": 2755 }, { "epoch": 0.19401619148187257, "grad_norm": 2.0958869457244873, "learning_rate": 1.3035940171830784e-05, "loss": 0.756, "step": 2756 }, { "epoch": 0.19408658922914468, "grad_norm": 1.867287516593933, "learning_rate": 1.3035131688341538e-05, "loss": 0.5915, "step": 2757 }, { "epoch": 0.19415698697641676, "grad_norm": 1.762721061706543, "learning_rate": 1.3034322891080333e-05, "loss": 0.7172, "step": 2758 }, { "epoch": 0.19422738472368883, "grad_norm": 2.3382787704467773, "learning_rate": 1.3033513780089213e-05, "loss": 0.7807, "step": 2759 }, { "epoch": 0.19429778247096094, "grad_norm": 2.003969430923462, "learning_rate": 1.303270435541025e-05, "loss": 0.7743, "step": 2760 }, { "epoch": 0.19436818021823302, "grad_norm": 2.2048773765563965, "learning_rate": 1.3031894617085521e-05, "loss": 0.7692, "step": 2761 }, { "epoch": 0.1944385779655051, "grad_norm": 2.39367413520813, "learning_rate": 1.3031084565157129e-05, "loss": 0.7393, "step": 2762 }, { "epoch": 0.1945089757127772, "grad_norm": 1.9684512615203857, "learning_rate": 1.3030274199667189e-05, "loss": 0.6916, "step": 2763 }, { "epoch": 0.19457937346004928, "grad_norm": 2.8447518348693848, "learning_rate": 1.3029463520657832e-05, "loss": 0.8532, "step": 2764 }, { "epoch": 0.19464977120732135, "grad_norm": 2.209026336669922, "learning_rate": 1.3028652528171207e-05, "loss": 0.9653, "step": 2765 }, { "epoch": 0.19472016895459346, "grad_norm": 2.4242308139801025, "learning_rate": 1.3027841222249476e-05, "loss": 0.8526, "step": 2766 }, { "epoch": 0.19479056670186554, "grad_norm": 2.165584087371826, "learning_rate": 1.3027029602934823e-05, "loss": 0.7654, "step": 2767 }, { "epoch": 0.19486096444913764, "grad_norm": 2.6486003398895264, "learning_rate": 1.3026217670269438e-05, "loss": 0.7788, "step": 2768 }, { "epoch": 0.19493136219640972, "grad_norm": 2.2641489505767822, "learning_rate": 1.3025405424295543e-05, "loss": 0.7694, "step": 2769 }, { "epoch": 0.1950017599436818, "grad_norm": 1.7094526290893555, "learning_rate": 1.3024592865055361e-05, "loss": 0.7795, "step": 2770 }, { "epoch": 0.1950721576909539, "grad_norm": 2.1737630367279053, "learning_rate": 1.302377999259114e-05, "loss": 0.6648, "step": 2771 }, { "epoch": 0.19514255543822598, "grad_norm": 2.237395763397217, "learning_rate": 1.302296680694514e-05, "loss": 0.7551, "step": 2772 }, { "epoch": 0.19521295318549806, "grad_norm": 2.3748974800109863, "learning_rate": 1.3022153308159645e-05, "loss": 0.6245, "step": 2773 }, { "epoch": 0.19528335093277016, "grad_norm": 2.247767448425293, "learning_rate": 1.3021339496276943e-05, "loss": 0.8586, "step": 2774 }, { "epoch": 0.19535374868004224, "grad_norm": 3.1952812671661377, "learning_rate": 1.3020525371339348e-05, "loss": 0.8344, "step": 2775 }, { "epoch": 0.19542414642731432, "grad_norm": 2.8445017337799072, "learning_rate": 1.3019710933389185e-05, "loss": 0.6956, "step": 2776 }, { "epoch": 0.19549454417458642, "grad_norm": 2.2520487308502197, "learning_rate": 1.30188961824688e-05, "loss": 0.7413, "step": 2777 }, { "epoch": 0.1955649419218585, "grad_norm": 2.014202356338501, "learning_rate": 1.3018081118620552e-05, "loss": 0.8411, "step": 2778 }, { "epoch": 0.19563533966913058, "grad_norm": 2.12870717048645, "learning_rate": 1.3017265741886815e-05, "loss": 0.6461, "step": 2779 }, { "epoch": 0.19570573741640268, "grad_norm": 2.101797342300415, "learning_rate": 1.3016450052309983e-05, "loss": 0.8545, "step": 2780 }, { "epoch": 0.19577613516367476, "grad_norm": 2.379990816116333, "learning_rate": 1.3015634049932464e-05, "loss": 0.7968, "step": 2781 }, { "epoch": 0.19584653291094686, "grad_norm": 2.038531541824341, "learning_rate": 1.3014817734796683e-05, "loss": 0.7992, "step": 2782 }, { "epoch": 0.19591693065821894, "grad_norm": 2.1800918579101562, "learning_rate": 1.3014001106945081e-05, "loss": 0.8469, "step": 2783 }, { "epoch": 0.19598732840549102, "grad_norm": 2.6332337856292725, "learning_rate": 1.3013184166420115e-05, "loss": 0.753, "step": 2784 }, { "epoch": 0.19605772615276312, "grad_norm": 1.955428123474121, "learning_rate": 1.3012366913264256e-05, "loss": 0.7138, "step": 2785 }, { "epoch": 0.1961281239000352, "grad_norm": 4.231899261474609, "learning_rate": 1.3011549347519996e-05, "loss": 0.7401, "step": 2786 }, { "epoch": 0.19619852164730728, "grad_norm": 2.3288609981536865, "learning_rate": 1.3010731469229844e-05, "loss": 0.6908, "step": 2787 }, { "epoch": 0.19626891939457938, "grad_norm": 2.1061110496520996, "learning_rate": 1.3009913278436318e-05, "loss": 0.8138, "step": 2788 }, { "epoch": 0.19633931714185146, "grad_norm": 2.6487224102020264, "learning_rate": 1.3009094775181959e-05, "loss": 0.8332, "step": 2789 }, { "epoch": 0.19640971488912354, "grad_norm": 2.483846426010132, "learning_rate": 1.3008275959509318e-05, "loss": 0.7152, "step": 2790 }, { "epoch": 0.19648011263639564, "grad_norm": 2.150294780731201, "learning_rate": 1.300745683146097e-05, "loss": 0.6975, "step": 2791 }, { "epoch": 0.19655051038366772, "grad_norm": 2.5024619102478027, "learning_rate": 1.3006637391079499e-05, "loss": 0.7803, "step": 2792 }, { "epoch": 0.1966209081309398, "grad_norm": 1.8588032722473145, "learning_rate": 1.3005817638407513e-05, "loss": 0.6949, "step": 2793 }, { "epoch": 0.1966913058782119, "grad_norm": 2.7311601638793945, "learning_rate": 1.3004997573487626e-05, "loss": 0.8369, "step": 2794 }, { "epoch": 0.19676170362548398, "grad_norm": 1.9006553888320923, "learning_rate": 1.3004177196362478e-05, "loss": 0.715, "step": 2795 }, { "epoch": 0.19683210137275609, "grad_norm": 2.1600940227508545, "learning_rate": 1.300335650707472e-05, "loss": 0.7188, "step": 2796 }, { "epoch": 0.19690249912002816, "grad_norm": 2.0004117488861084, "learning_rate": 1.3002535505667021e-05, "loss": 0.7717, "step": 2797 }, { "epoch": 0.19697289686730024, "grad_norm": 2.239529609680176, "learning_rate": 1.3001714192182061e-05, "loss": 0.7163, "step": 2798 }, { "epoch": 0.19704329461457235, "grad_norm": 2.7071211338043213, "learning_rate": 1.300089256666255e-05, "loss": 0.7636, "step": 2799 }, { "epoch": 0.19711369236184442, "grad_norm": 1.9972362518310547, "learning_rate": 1.3000070629151197e-05, "loss": 0.8311, "step": 2800 }, { "epoch": 0.1971840901091165, "grad_norm": 2.205315113067627, "learning_rate": 1.2999248379690739e-05, "loss": 0.7539, "step": 2801 }, { "epoch": 0.1972544878563886, "grad_norm": 2.04468035697937, "learning_rate": 1.2998425818323923e-05, "loss": 0.7305, "step": 2802 }, { "epoch": 0.19732488560366068, "grad_norm": 1.7602639198303223, "learning_rate": 1.2997602945093516e-05, "loss": 0.72, "step": 2803 }, { "epoch": 0.19739528335093276, "grad_norm": 3.369656801223755, "learning_rate": 1.2996779760042301e-05, "loss": 0.6883, "step": 2804 }, { "epoch": 0.19746568109820486, "grad_norm": 2.1299684047698975, "learning_rate": 1.2995956263213076e-05, "loss": 0.8067, "step": 2805 }, { "epoch": 0.19753607884547694, "grad_norm": 2.0659384727478027, "learning_rate": 1.2995132454648654e-05, "loss": 0.7701, "step": 2806 }, { "epoch": 0.19760647659274902, "grad_norm": 2.0151760578155518, "learning_rate": 1.2994308334391866e-05, "loss": 0.6707, "step": 2807 }, { "epoch": 0.19767687434002112, "grad_norm": 1.9165823459625244, "learning_rate": 1.299348390248556e-05, "loss": 0.709, "step": 2808 }, { "epoch": 0.1977472720872932, "grad_norm": 2.2062740325927734, "learning_rate": 1.2992659158972595e-05, "loss": 0.6871, "step": 2809 }, { "epoch": 0.1978176698345653, "grad_norm": 2.1248085498809814, "learning_rate": 1.2991834103895856e-05, "loss": 0.8365, "step": 2810 }, { "epoch": 0.19788806758183738, "grad_norm": 2.281906843185425, "learning_rate": 1.2991008737298235e-05, "loss": 0.7485, "step": 2811 }, { "epoch": 0.19795846532910946, "grad_norm": 2.1689603328704834, "learning_rate": 1.2990183059222643e-05, "loss": 0.6495, "step": 2812 }, { "epoch": 0.19802886307638157, "grad_norm": 3.091979742050171, "learning_rate": 1.298935706971201e-05, "loss": 0.6978, "step": 2813 }, { "epoch": 0.19809926082365364, "grad_norm": 1.8291183710098267, "learning_rate": 1.2988530768809278e-05, "loss": 0.8347, "step": 2814 }, { "epoch": 0.19816965857092572, "grad_norm": 2.44168758392334, "learning_rate": 1.2987704156557407e-05, "loss": 0.8155, "step": 2815 }, { "epoch": 0.19824005631819783, "grad_norm": 1.8233612775802612, "learning_rate": 1.2986877232999373e-05, "loss": 0.7355, "step": 2816 }, { "epoch": 0.1983104540654699, "grad_norm": 1.9227584600448608, "learning_rate": 1.2986049998178172e-05, "loss": 0.6583, "step": 2817 }, { "epoch": 0.19838085181274198, "grad_norm": 1.9398671388626099, "learning_rate": 1.298522245213681e-05, "loss": 0.6506, "step": 2818 }, { "epoch": 0.1984512495600141, "grad_norm": 2.112593173980713, "learning_rate": 1.2984394594918313e-05, "loss": 0.708, "step": 2819 }, { "epoch": 0.19852164730728616, "grad_norm": 1.9524953365325928, "learning_rate": 1.2983566426565719e-05, "loss": 0.8291, "step": 2820 }, { "epoch": 0.19859204505455824, "grad_norm": 2.3048479557037354, "learning_rate": 1.2982737947122089e-05, "loss": 0.7372, "step": 2821 }, { "epoch": 0.19866244280183035, "grad_norm": 1.8637635707855225, "learning_rate": 1.2981909156630493e-05, "loss": 0.717, "step": 2822 }, { "epoch": 0.19873284054910242, "grad_norm": 2.1397547721862793, "learning_rate": 1.2981080055134025e-05, "loss": 0.7968, "step": 2823 }, { "epoch": 0.19880323829637453, "grad_norm": 1.937309980392456, "learning_rate": 1.2980250642675786e-05, "loss": 0.6804, "step": 2824 }, { "epoch": 0.1988736360436466, "grad_norm": 2.0659267902374268, "learning_rate": 1.2979420919298901e-05, "loss": 0.6891, "step": 2825 }, { "epoch": 0.19894403379091868, "grad_norm": 2.181600570678711, "learning_rate": 1.2978590885046508e-05, "loss": 0.7028, "step": 2826 }, { "epoch": 0.1990144315381908, "grad_norm": 2.405527353286743, "learning_rate": 1.2977760539961759e-05, "loss": 0.8053, "step": 2827 }, { "epoch": 0.19908482928546287, "grad_norm": 2.1469526290893555, "learning_rate": 1.2976929884087825e-05, "loss": 0.6974, "step": 2828 }, { "epoch": 0.19915522703273494, "grad_norm": 2.1089656352996826, "learning_rate": 1.2976098917467895e-05, "loss": 0.7781, "step": 2829 }, { "epoch": 0.19922562478000705, "grad_norm": 2.259326696395874, "learning_rate": 1.297526764014517e-05, "loss": 0.7774, "step": 2830 }, { "epoch": 0.19929602252727913, "grad_norm": 1.9495466947555542, "learning_rate": 1.2974436052162866e-05, "loss": 0.838, "step": 2831 }, { "epoch": 0.1993664202745512, "grad_norm": 2.1152100563049316, "learning_rate": 1.2973604153564225e-05, "loss": 0.756, "step": 2832 }, { "epoch": 0.1994368180218233, "grad_norm": 2.0844058990478516, "learning_rate": 1.2972771944392491e-05, "loss": 0.8028, "step": 2833 }, { "epoch": 0.1995072157690954, "grad_norm": 2.151262044906616, "learning_rate": 1.2971939424690936e-05, "loss": 0.6658, "step": 2834 }, { "epoch": 0.19957761351636746, "grad_norm": 2.0456371307373047, "learning_rate": 1.2971106594502842e-05, "loss": 0.7, "step": 2835 }, { "epoch": 0.19964801126363957, "grad_norm": 1.9289238452911377, "learning_rate": 1.2970273453871508e-05, "loss": 0.8087, "step": 2836 }, { "epoch": 0.19971840901091165, "grad_norm": 2.058263063430786, "learning_rate": 1.296944000284025e-05, "loss": 0.7162, "step": 2837 }, { "epoch": 0.19978880675818375, "grad_norm": 2.0099096298217773, "learning_rate": 1.29686062414524e-05, "loss": 0.6161, "step": 2838 }, { "epoch": 0.19985920450545583, "grad_norm": 1.8196367025375366, "learning_rate": 1.2967772169751306e-05, "loss": 0.7605, "step": 2839 }, { "epoch": 0.1999296022527279, "grad_norm": 2.8683559894561768, "learning_rate": 1.2966937787780332e-05, "loss": 0.8063, "step": 2840 }, { "epoch": 0.2, "grad_norm": 2.2348105907440186, "learning_rate": 1.2966103095582858e-05, "loss": 0.6561, "step": 2841 }, { "epoch": 0.2000703977472721, "grad_norm": 1.9138249158859253, "learning_rate": 1.2965268093202281e-05, "loss": 0.7738, "step": 2842 }, { "epoch": 0.20014079549454417, "grad_norm": 2.21639084815979, "learning_rate": 1.2964432780682014e-05, "loss": 0.6888, "step": 2843 }, { "epoch": 0.20021119324181627, "grad_norm": 1.9455065727233887, "learning_rate": 1.2963597158065485e-05, "loss": 0.6479, "step": 2844 }, { "epoch": 0.20028159098908835, "grad_norm": 2.2107162475585938, "learning_rate": 1.2962761225396139e-05, "loss": 0.872, "step": 2845 }, { "epoch": 0.20035198873636043, "grad_norm": 2.121769666671753, "learning_rate": 1.2961924982717437e-05, "loss": 0.7043, "step": 2846 }, { "epoch": 0.20042238648363253, "grad_norm": 2.1713573932647705, "learning_rate": 1.2961088430072853e-05, "loss": 0.7202, "step": 2847 }, { "epoch": 0.2004927842309046, "grad_norm": 2.034632682800293, "learning_rate": 1.2960251567505886e-05, "loss": 0.6721, "step": 2848 }, { "epoch": 0.20056318197817669, "grad_norm": 1.7628371715545654, "learning_rate": 1.2959414395060041e-05, "loss": 0.794, "step": 2849 }, { "epoch": 0.2006335797254488, "grad_norm": 1.5182468891143799, "learning_rate": 1.2958576912778844e-05, "loss": 0.8976, "step": 2850 }, { "epoch": 0.20070397747272087, "grad_norm": 2.069378614425659, "learning_rate": 1.2957739120705837e-05, "loss": 0.7208, "step": 2851 }, { "epoch": 0.20077437521999297, "grad_norm": 2.0398383140563965, "learning_rate": 1.2956901018884578e-05, "loss": 0.8075, "step": 2852 }, { "epoch": 0.20084477296726505, "grad_norm": 2.3953700065612793, "learning_rate": 1.295606260735864e-05, "loss": 0.837, "step": 2853 }, { "epoch": 0.20091517071453713, "grad_norm": 1.8547263145446777, "learning_rate": 1.2955223886171611e-05, "loss": 0.6763, "step": 2854 }, { "epoch": 0.20098556846180923, "grad_norm": 2.385629415512085, "learning_rate": 1.2954384855367101e-05, "loss": 0.8897, "step": 2855 }, { "epoch": 0.2010559662090813, "grad_norm": 3.1562159061431885, "learning_rate": 1.295354551498873e-05, "loss": 0.7942, "step": 2856 }, { "epoch": 0.2011263639563534, "grad_norm": 1.9930483102798462, "learning_rate": 1.2952705865080135e-05, "loss": 0.671, "step": 2857 }, { "epoch": 0.2011967617036255, "grad_norm": 1.9684098958969116, "learning_rate": 1.2951865905684971e-05, "loss": 0.8718, "step": 2858 }, { "epoch": 0.20126715945089757, "grad_norm": 2.0362963676452637, "learning_rate": 1.2951025636846909e-05, "loss": 0.7826, "step": 2859 }, { "epoch": 0.20133755719816965, "grad_norm": 1.8419691324234009, "learning_rate": 1.2950185058609634e-05, "loss": 0.7464, "step": 2860 }, { "epoch": 0.20140795494544175, "grad_norm": 3.8464763164520264, "learning_rate": 1.2949344171016848e-05, "loss": 0.7962, "step": 2861 }, { "epoch": 0.20147835269271383, "grad_norm": 2.2273335456848145, "learning_rate": 1.2948502974112272e-05, "loss": 0.7655, "step": 2862 }, { "epoch": 0.2015487504399859, "grad_norm": 2.1485912799835205, "learning_rate": 1.294766146793964e-05, "loss": 0.7673, "step": 2863 }, { "epoch": 0.201619148187258, "grad_norm": 2.328144073486328, "learning_rate": 1.29468196525427e-05, "loss": 0.769, "step": 2864 }, { "epoch": 0.2016895459345301, "grad_norm": 1.7949751615524292, "learning_rate": 1.294597752796522e-05, "loss": 0.7731, "step": 2865 }, { "epoch": 0.2017599436818022, "grad_norm": 2.0370733737945557, "learning_rate": 1.2945135094250984e-05, "loss": 0.7306, "step": 2866 }, { "epoch": 0.20183034142907427, "grad_norm": 3.0403120517730713, "learning_rate": 1.294429235144379e-05, "loss": 0.8582, "step": 2867 }, { "epoch": 0.20190073917634635, "grad_norm": 1.4900261163711548, "learning_rate": 1.2943449299587454e-05, "loss": 0.8521, "step": 2868 }, { "epoch": 0.20197113692361846, "grad_norm": 2.114074945449829, "learning_rate": 1.2942605938725806e-05, "loss": 0.8275, "step": 2869 }, { "epoch": 0.20204153467089053, "grad_norm": 2.214066743850708, "learning_rate": 1.2941762268902694e-05, "loss": 0.7005, "step": 2870 }, { "epoch": 0.2021119324181626, "grad_norm": 1.9896901845932007, "learning_rate": 1.2940918290161977e-05, "loss": 0.7359, "step": 2871 }, { "epoch": 0.20218233016543471, "grad_norm": 2.5028276443481445, "learning_rate": 1.2940074002547542e-05, "loss": 0.722, "step": 2872 }, { "epoch": 0.2022527279127068, "grad_norm": 1.8832263946533203, "learning_rate": 1.2939229406103278e-05, "loss": 0.7537, "step": 2873 }, { "epoch": 0.20232312565997887, "grad_norm": 2.0421197414398193, "learning_rate": 1.2938384500873098e-05, "loss": 0.7412, "step": 2874 }, { "epoch": 0.20239352340725097, "grad_norm": 1.8767123222351074, "learning_rate": 1.293753928690093e-05, "loss": 0.6927, "step": 2875 }, { "epoch": 0.20246392115452305, "grad_norm": 5.04095458984375, "learning_rate": 1.2936693764230717e-05, "loss": 0.6829, "step": 2876 }, { "epoch": 0.20253431890179513, "grad_norm": 2.0795814990997314, "learning_rate": 1.2935847932906419e-05, "loss": 0.7961, "step": 2877 }, { "epoch": 0.20260471664906723, "grad_norm": 2.2549116611480713, "learning_rate": 1.2935001792972011e-05, "loss": 0.6345, "step": 2878 }, { "epoch": 0.2026751143963393, "grad_norm": 1.8898028135299683, "learning_rate": 1.2934155344471485e-05, "loss": 0.7062, "step": 2879 }, { "epoch": 0.20274551214361142, "grad_norm": 2.3344531059265137, "learning_rate": 1.2933308587448847e-05, "loss": 0.7624, "step": 2880 }, { "epoch": 0.2028159098908835, "grad_norm": 1.9059360027313232, "learning_rate": 1.2932461521948125e-05, "loss": 0.6858, "step": 2881 }, { "epoch": 0.20288630763815557, "grad_norm": 2.1258914470672607, "learning_rate": 1.2931614148013353e-05, "loss": 0.7384, "step": 2882 }, { "epoch": 0.20295670538542768, "grad_norm": 2.2573955059051514, "learning_rate": 1.2930766465688591e-05, "loss": 0.7864, "step": 2883 }, { "epoch": 0.20302710313269975, "grad_norm": 1.8230419158935547, "learning_rate": 1.2929918475017909e-05, "loss": 0.7338, "step": 2884 }, { "epoch": 0.20309750087997183, "grad_norm": 2.21211838722229, "learning_rate": 1.2929070176045392e-05, "loss": 0.8074, "step": 2885 }, { "epoch": 0.20316789862724394, "grad_norm": 2.009521007537842, "learning_rate": 1.292822156881515e-05, "loss": 0.7239, "step": 2886 }, { "epoch": 0.20323829637451601, "grad_norm": 5.319012641906738, "learning_rate": 1.29273726533713e-05, "loss": 0.9075, "step": 2887 }, { "epoch": 0.2033086941217881, "grad_norm": 1.7754051685333252, "learning_rate": 1.2926523429757975e-05, "loss": 0.851, "step": 2888 }, { "epoch": 0.2033790918690602, "grad_norm": 2.138119697570801, "learning_rate": 1.2925673898019333e-05, "loss": 0.7645, "step": 2889 }, { "epoch": 0.20344948961633227, "grad_norm": 2.1004340648651123, "learning_rate": 1.2924824058199537e-05, "loss": 0.7091, "step": 2890 }, { "epoch": 0.20351988736360435, "grad_norm": 2.3534152507781982, "learning_rate": 1.2923973910342771e-05, "loss": 0.7933, "step": 2891 }, { "epoch": 0.20359028511087646, "grad_norm": 1.8388102054595947, "learning_rate": 1.2923123454493237e-05, "loss": 0.7564, "step": 2892 }, { "epoch": 0.20366068285814853, "grad_norm": 1.6807938814163208, "learning_rate": 1.2922272690695151e-05, "loss": 0.6988, "step": 2893 }, { "epoch": 0.20373108060542064, "grad_norm": 2.025575876235962, "learning_rate": 1.2921421618992744e-05, "loss": 0.8812, "step": 2894 }, { "epoch": 0.20380147835269272, "grad_norm": 2.384500503540039, "learning_rate": 1.2920570239430264e-05, "loss": 0.8095, "step": 2895 }, { "epoch": 0.2038718760999648, "grad_norm": 1.9504368305206299, "learning_rate": 1.2919718552051975e-05, "loss": 0.7702, "step": 2896 }, { "epoch": 0.2039422738472369, "grad_norm": 2.3613922595977783, "learning_rate": 1.2918866556902158e-05, "loss": 0.7966, "step": 2897 }, { "epoch": 0.20401267159450898, "grad_norm": 2.1860833168029785, "learning_rate": 1.291801425402511e-05, "loss": 0.6658, "step": 2898 }, { "epoch": 0.20408306934178105, "grad_norm": 1.856161117553711, "learning_rate": 1.2917161643465138e-05, "loss": 0.8094, "step": 2899 }, { "epoch": 0.20415346708905316, "grad_norm": 2.78106427192688, "learning_rate": 1.2916308725266575e-05, "loss": 0.7148, "step": 2900 }, { "epoch": 0.20422386483632524, "grad_norm": 1.8995968103408813, "learning_rate": 1.2915455499473762e-05, "loss": 0.8199, "step": 2901 }, { "epoch": 0.2042942625835973, "grad_norm": 2.266082525253296, "learning_rate": 1.2914601966131061e-05, "loss": 0.8534, "step": 2902 }, { "epoch": 0.20436466033086942, "grad_norm": 2.6306707859039307, "learning_rate": 1.2913748125282849e-05, "loss": 0.8553, "step": 2903 }, { "epoch": 0.2044350580781415, "grad_norm": 1.8121219873428345, "learning_rate": 1.2912893976973514e-05, "loss": 0.6494, "step": 2904 }, { "epoch": 0.20450545582541357, "grad_norm": 3.189236879348755, "learning_rate": 1.2912039521247465e-05, "loss": 0.766, "step": 2905 }, { "epoch": 0.20457585357268568, "grad_norm": 2.2701783180236816, "learning_rate": 1.2911184758149127e-05, "loss": 0.6702, "step": 2906 }, { "epoch": 0.20464625131995776, "grad_norm": 2.1670026779174805, "learning_rate": 1.2910329687722943e-05, "loss": 0.7257, "step": 2907 }, { "epoch": 0.20471664906722986, "grad_norm": 1.777356505393982, "learning_rate": 1.2909474310013364e-05, "loss": 0.7617, "step": 2908 }, { "epoch": 0.20478704681450194, "grad_norm": 1.8031384944915771, "learning_rate": 1.2908618625064865e-05, "loss": 0.777, "step": 2909 }, { "epoch": 0.20485744456177402, "grad_norm": 2.091069221496582, "learning_rate": 1.2907762632921932e-05, "loss": 0.7017, "step": 2910 }, { "epoch": 0.20492784230904612, "grad_norm": 1.982006549835205, "learning_rate": 1.2906906333629068e-05, "loss": 0.7227, "step": 2911 }, { "epoch": 0.2049982400563182, "grad_norm": 2.096505880355835, "learning_rate": 1.2906049727230795e-05, "loss": 0.7961, "step": 2912 }, { "epoch": 0.20506863780359028, "grad_norm": 1.738042950630188, "learning_rate": 1.2905192813771648e-05, "loss": 0.7264, "step": 2913 }, { "epoch": 0.20513903555086238, "grad_norm": 2.8099374771118164, "learning_rate": 1.2904335593296176e-05, "loss": 0.8555, "step": 2914 }, { "epoch": 0.20520943329813446, "grad_norm": 2.2637176513671875, "learning_rate": 1.2903478065848955e-05, "loss": 0.8351, "step": 2915 }, { "epoch": 0.20527983104540654, "grad_norm": 2.0306296348571777, "learning_rate": 1.2902620231474559e-05, "loss": 0.7509, "step": 2916 }, { "epoch": 0.20535022879267864, "grad_norm": 2.6248860359191895, "learning_rate": 1.2901762090217591e-05, "loss": 0.8289, "step": 2917 }, { "epoch": 0.20542062653995072, "grad_norm": 2.2198662757873535, "learning_rate": 1.2900903642122667e-05, "loss": 0.7655, "step": 2918 }, { "epoch": 0.2054910242872228, "grad_norm": 1.9832063913345337, "learning_rate": 1.290004488723442e-05, "loss": 0.6913, "step": 2919 }, { "epoch": 0.2055614220344949, "grad_norm": 2.3485302925109863, "learning_rate": 1.2899185825597493e-05, "loss": 0.8328, "step": 2920 }, { "epoch": 0.20563181978176698, "grad_norm": 2.24904203414917, "learning_rate": 1.2898326457256557e-05, "loss": 0.7077, "step": 2921 }, { "epoch": 0.20570221752903908, "grad_norm": 2.032475471496582, "learning_rate": 1.2897466782256282e-05, "loss": 0.8003, "step": 2922 }, { "epoch": 0.20577261527631116, "grad_norm": 2.842956304550171, "learning_rate": 1.2896606800641372e-05, "loss": 0.7039, "step": 2923 }, { "epoch": 0.20584301302358324, "grad_norm": 2.5498368740081787, "learning_rate": 1.2895746512456532e-05, "loss": 0.7233, "step": 2924 }, { "epoch": 0.20591341077085534, "grad_norm": 1.9273475408554077, "learning_rate": 1.2894885917746491e-05, "loss": 0.7536, "step": 2925 }, { "epoch": 0.20598380851812742, "grad_norm": 2.493891716003418, "learning_rate": 1.2894025016555995e-05, "loss": 0.7637, "step": 2926 }, { "epoch": 0.2060542062653995, "grad_norm": 2.4803435802459717, "learning_rate": 1.2893163808929798e-05, "loss": 0.6384, "step": 2927 }, { "epoch": 0.2061246040126716, "grad_norm": 2.988567352294922, "learning_rate": 1.2892302294912678e-05, "loss": 0.7345, "step": 2928 }, { "epoch": 0.20619500175994368, "grad_norm": 2.2329914569854736, "learning_rate": 1.2891440474549424e-05, "loss": 0.7217, "step": 2929 }, { "epoch": 0.20626539950721576, "grad_norm": 3.084268569946289, "learning_rate": 1.2890578347884846e-05, "loss": 0.8701, "step": 2930 }, { "epoch": 0.20633579725448786, "grad_norm": 2.3218142986297607, "learning_rate": 1.2889715914963762e-05, "loss": 0.8188, "step": 2931 }, { "epoch": 0.20640619500175994, "grad_norm": 2.604203224182129, "learning_rate": 1.2888853175831017e-05, "loss": 0.8558, "step": 2932 }, { "epoch": 0.20647659274903202, "grad_norm": 1.9492626190185547, "learning_rate": 1.288799013053146e-05, "loss": 0.686, "step": 2933 }, { "epoch": 0.20654699049630412, "grad_norm": 2.570934534072876, "learning_rate": 1.2887126779109965e-05, "loss": 0.771, "step": 2934 }, { "epoch": 0.2066173882435762, "grad_norm": 2.85846209526062, "learning_rate": 1.2886263121611416e-05, "loss": 0.8736, "step": 2935 }, { "epoch": 0.2066877859908483, "grad_norm": 3.319136619567871, "learning_rate": 1.2885399158080718e-05, "loss": 0.8952, "step": 2936 }, { "epoch": 0.20675818373812038, "grad_norm": 1.875622034072876, "learning_rate": 1.2884534888562786e-05, "loss": 0.6846, "step": 2937 }, { "epoch": 0.20682858148539246, "grad_norm": 2.0086350440979004, "learning_rate": 1.2883670313102557e-05, "loss": 0.7173, "step": 2938 }, { "epoch": 0.20689897923266456, "grad_norm": 1.9741716384887695, "learning_rate": 1.2882805431744979e-05, "loss": 0.83, "step": 2939 }, { "epoch": 0.20696937697993664, "grad_norm": 1.8565759658813477, "learning_rate": 1.288194024453502e-05, "loss": 0.728, "step": 2940 }, { "epoch": 0.20703977472720872, "grad_norm": 2.306332588195801, "learning_rate": 1.2881074751517658e-05, "loss": 0.6522, "step": 2941 }, { "epoch": 0.20711017247448082, "grad_norm": 2.625253200531006, "learning_rate": 1.2880208952737896e-05, "loss": 0.7052, "step": 2942 }, { "epoch": 0.2071805702217529, "grad_norm": 2.0825912952423096, "learning_rate": 1.2879342848240744e-05, "loss": 0.7516, "step": 2943 }, { "epoch": 0.20725096796902498, "grad_norm": 1.792840838432312, "learning_rate": 1.2878476438071232e-05, "loss": 0.7393, "step": 2944 }, { "epoch": 0.20732136571629708, "grad_norm": 4.916806221008301, "learning_rate": 1.2877609722274408e-05, "loss": 0.8064, "step": 2945 }, { "epoch": 0.20739176346356916, "grad_norm": 2.1589739322662354, "learning_rate": 1.287674270089533e-05, "loss": 0.7475, "step": 2946 }, { "epoch": 0.20746216121084124, "grad_norm": 3.2354280948638916, "learning_rate": 1.2875875373979076e-05, "loss": 0.6974, "step": 2947 }, { "epoch": 0.20753255895811334, "grad_norm": 2.1476645469665527, "learning_rate": 1.2875007741570744e-05, "loss": 0.7717, "step": 2948 }, { "epoch": 0.20760295670538542, "grad_norm": 4.201458930969238, "learning_rate": 1.2874139803715434e-05, "loss": 0.7375, "step": 2949 }, { "epoch": 0.20767335445265753, "grad_norm": 2.081859588623047, "learning_rate": 1.2873271560458277e-05, "loss": 0.8557, "step": 2950 }, { "epoch": 0.2077437521999296, "grad_norm": 1.77464759349823, "learning_rate": 1.2872403011844415e-05, "loss": 0.7297, "step": 2951 }, { "epoch": 0.20781414994720168, "grad_norm": 2.1298911571502686, "learning_rate": 1.2871534157918998e-05, "loss": 0.6886, "step": 2952 }, { "epoch": 0.2078845476944738, "grad_norm": 2.9709126949310303, "learning_rate": 1.2870664998727204e-05, "loss": 0.694, "step": 2953 }, { "epoch": 0.20795494544174586, "grad_norm": 2.084395170211792, "learning_rate": 1.2869795534314222e-05, "loss": 0.7137, "step": 2954 }, { "epoch": 0.20802534318901794, "grad_norm": 2.20050048828125, "learning_rate": 1.2868925764725251e-05, "loss": 0.7864, "step": 2955 }, { "epoch": 0.20809574093629005, "grad_norm": 2.1273438930511475, "learning_rate": 1.2868055690005517e-05, "loss": 0.7795, "step": 2956 }, { "epoch": 0.20816613868356212, "grad_norm": 1.9609476327896118, "learning_rate": 1.286718531020025e-05, "loss": 0.6847, "step": 2957 }, { "epoch": 0.2082365364308342, "grad_norm": 2.0419363975524902, "learning_rate": 1.286631462535471e-05, "loss": 0.651, "step": 2958 }, { "epoch": 0.2083069341781063, "grad_norm": 2.424896478652954, "learning_rate": 1.2865443635514155e-05, "loss": 0.7778, "step": 2959 }, { "epoch": 0.20837733192537838, "grad_norm": 2.2687911987304688, "learning_rate": 1.2864572340723876e-05, "loss": 0.7618, "step": 2960 }, { "epoch": 0.20844772967265046, "grad_norm": 1.8922141790390015, "learning_rate": 1.286370074102917e-05, "loss": 0.9435, "step": 2961 }, { "epoch": 0.20851812741992257, "grad_norm": 2.1771111488342285, "learning_rate": 1.286282883647535e-05, "loss": 0.7413, "step": 2962 }, { "epoch": 0.20858852516719464, "grad_norm": 6.954104423522949, "learning_rate": 1.286195662710775e-05, "loss": 0.776, "step": 2963 }, { "epoch": 0.20865892291446675, "grad_norm": 3.183263063430786, "learning_rate": 1.2861084112971717e-05, "loss": 0.8058, "step": 2964 }, { "epoch": 0.20872932066173883, "grad_norm": 3.5537304878234863, "learning_rate": 1.2860211294112612e-05, "loss": 0.8838, "step": 2965 }, { "epoch": 0.2087997184090109, "grad_norm": 2.1024482250213623, "learning_rate": 1.2859338170575815e-05, "loss": 0.7075, "step": 2966 }, { "epoch": 0.208870116156283, "grad_norm": 1.9769837856292725, "learning_rate": 1.285846474240672e-05, "loss": 0.8607, "step": 2967 }, { "epoch": 0.2089405139035551, "grad_norm": 1.9503802061080933, "learning_rate": 1.2857591009650735e-05, "loss": 0.7236, "step": 2968 }, { "epoch": 0.20901091165082716, "grad_norm": 2.055708646774292, "learning_rate": 1.2856716972353292e-05, "loss": 0.7197, "step": 2969 }, { "epoch": 0.20908130939809927, "grad_norm": 2.288564682006836, "learning_rate": 1.2855842630559827e-05, "loss": 0.5981, "step": 2970 }, { "epoch": 0.20915170714537135, "grad_norm": 2.139580011367798, "learning_rate": 1.2854967984315801e-05, "loss": 0.7552, "step": 2971 }, { "epoch": 0.20922210489264342, "grad_norm": 1.9567166566848755, "learning_rate": 1.2854093033666689e-05, "loss": 0.673, "step": 2972 }, { "epoch": 0.20929250263991553, "grad_norm": 2.0222623348236084, "learning_rate": 1.2853217778657975e-05, "loss": 0.7723, "step": 2973 }, { "epoch": 0.2093629003871876, "grad_norm": 2.0530974864959717, "learning_rate": 1.2852342219335169e-05, "loss": 0.6352, "step": 2974 }, { "epoch": 0.2094332981344597, "grad_norm": 2.242865800857544, "learning_rate": 1.2851466355743791e-05, "loss": 0.8141, "step": 2975 }, { "epoch": 0.2095036958817318, "grad_norm": 2.324664354324341, "learning_rate": 1.2850590187929377e-05, "loss": 0.7608, "step": 2976 }, { "epoch": 0.20957409362900387, "grad_norm": 2.3167903423309326, "learning_rate": 1.284971371593748e-05, "loss": 0.8451, "step": 2977 }, { "epoch": 0.20964449137627597, "grad_norm": 1.9584439992904663, "learning_rate": 1.2848836939813672e-05, "loss": 0.7223, "step": 2978 }, { "epoch": 0.20971488912354805, "grad_norm": 2.2730774879455566, "learning_rate": 1.2847959859603532e-05, "loss": 0.7603, "step": 2979 }, { "epoch": 0.20978528687082013, "grad_norm": 2.0540688037872314, "learning_rate": 1.2847082475352662e-05, "loss": 0.7106, "step": 2980 }, { "epoch": 0.20985568461809223, "grad_norm": 2.425849437713623, "learning_rate": 1.2846204787106681e-05, "loss": 0.7864, "step": 2981 }, { "epoch": 0.2099260823653643, "grad_norm": 1.8646453619003296, "learning_rate": 1.2845326794911217e-05, "loss": 0.7076, "step": 2982 }, { "epoch": 0.20999648011263639, "grad_norm": 2.402172565460205, "learning_rate": 1.2844448498811919e-05, "loss": 0.6891, "step": 2983 }, { "epoch": 0.2100668778599085, "grad_norm": 2.184095859527588, "learning_rate": 1.2843569898854452e-05, "loss": 0.7588, "step": 2984 }, { "epoch": 0.21013727560718057, "grad_norm": 2.4106931686401367, "learning_rate": 1.2842690995084495e-05, "loss": 0.8135, "step": 2985 }, { "epoch": 0.21020767335445265, "grad_norm": 2.2709105014801025, "learning_rate": 1.2841811787547737e-05, "loss": 0.7121, "step": 2986 }, { "epoch": 0.21027807110172475, "grad_norm": 2.3836894035339355, "learning_rate": 1.2840932276289898e-05, "loss": 0.6981, "step": 2987 }, { "epoch": 0.21034846884899683, "grad_norm": 2.4069457054138184, "learning_rate": 1.2840052461356698e-05, "loss": 0.7624, "step": 2988 }, { "epoch": 0.21041886659626893, "grad_norm": 2.672821521759033, "learning_rate": 1.2839172342793882e-05, "loss": 0.6154, "step": 2989 }, { "epoch": 0.210489264343541, "grad_norm": 2.0721442699432373, "learning_rate": 1.2838291920647206e-05, "loss": 0.6433, "step": 2990 }, { "epoch": 0.2105596620908131, "grad_norm": 2.4083735942840576, "learning_rate": 1.2837411194962448e-05, "loss": 0.7885, "step": 2991 }, { "epoch": 0.2106300598380852, "grad_norm": 2.296011209487915, "learning_rate": 1.2836530165785393e-05, "loss": 0.7022, "step": 2992 }, { "epoch": 0.21070045758535727, "grad_norm": 2.1844217777252197, "learning_rate": 1.283564883316185e-05, "loss": 0.8319, "step": 2993 }, { "epoch": 0.21077085533262935, "grad_norm": 1.962600588798523, "learning_rate": 1.283476719713764e-05, "loss": 0.7065, "step": 2994 }, { "epoch": 0.21084125307990145, "grad_norm": 2.6580471992492676, "learning_rate": 1.2833885257758597e-05, "loss": 0.7236, "step": 2995 }, { "epoch": 0.21091165082717353, "grad_norm": 1.9748520851135254, "learning_rate": 1.2833003015070578e-05, "loss": 0.8103, "step": 2996 }, { "epoch": 0.2109820485744456, "grad_norm": 2.0917856693267822, "learning_rate": 1.283212046911945e-05, "loss": 0.7377, "step": 2997 }, { "epoch": 0.2110524463217177, "grad_norm": 2.4745872020721436, "learning_rate": 1.2831237619951093e-05, "loss": 0.699, "step": 2998 }, { "epoch": 0.2111228440689898, "grad_norm": 2.1423213481903076, "learning_rate": 1.2830354467611416e-05, "loss": 0.7102, "step": 2999 }, { "epoch": 0.21119324181626187, "grad_norm": 2.5388002395629883, "learning_rate": 1.2829471012146328e-05, "loss": 0.9133, "step": 3000 }, { "epoch": 0.21126363956353397, "grad_norm": 2.128840446472168, "learning_rate": 1.2828587253601762e-05, "loss": 0.721, "step": 3001 }, { "epoch": 0.21133403731080605, "grad_norm": 2.150670289993286, "learning_rate": 1.2827703192023666e-05, "loss": 0.7463, "step": 3002 }, { "epoch": 0.21140443505807816, "grad_norm": 2.4538185596466064, "learning_rate": 1.2826818827458002e-05, "loss": 0.7868, "step": 3003 }, { "epoch": 0.21147483280535023, "grad_norm": 2.219212532043457, "learning_rate": 1.2825934159950752e-05, "loss": 0.6848, "step": 3004 }, { "epoch": 0.2115452305526223, "grad_norm": 2.7086071968078613, "learning_rate": 1.2825049189547909e-05, "loss": 0.7869, "step": 3005 }, { "epoch": 0.21161562829989441, "grad_norm": 2.2416110038757324, "learning_rate": 1.2824163916295482e-05, "loss": 0.7444, "step": 3006 }, { "epoch": 0.2116860260471665, "grad_norm": 2.158076524734497, "learning_rate": 1.2823278340239497e-05, "loss": 0.7952, "step": 3007 }, { "epoch": 0.21175642379443857, "grad_norm": 2.026205062866211, "learning_rate": 1.2822392461426002e-05, "loss": 0.7525, "step": 3008 }, { "epoch": 0.21182682154171067, "grad_norm": 2.1606574058532715, "learning_rate": 1.2821506279901045e-05, "loss": 0.8015, "step": 3009 }, { "epoch": 0.21189721928898275, "grad_norm": 2.3410604000091553, "learning_rate": 1.2820619795710705e-05, "loss": 0.7997, "step": 3010 }, { "epoch": 0.21196761703625483, "grad_norm": 2.24605131149292, "learning_rate": 1.2819733008901072e-05, "loss": 0.8502, "step": 3011 }, { "epoch": 0.21203801478352693, "grad_norm": 2.9325575828552246, "learning_rate": 1.2818845919518249e-05, "loss": 0.7483, "step": 3012 }, { "epoch": 0.212108412530799, "grad_norm": 2.3141210079193115, "learning_rate": 1.2817958527608358e-05, "loss": 0.7408, "step": 3013 }, { "epoch": 0.2121788102780711, "grad_norm": 2.1765236854553223, "learning_rate": 1.281707083321753e-05, "loss": 0.8486, "step": 3014 }, { "epoch": 0.2122492080253432, "grad_norm": 2.2700653076171875, "learning_rate": 1.2816182836391925e-05, "loss": 0.8677, "step": 3015 }, { "epoch": 0.21231960577261527, "grad_norm": 2.590378761291504, "learning_rate": 1.2815294537177707e-05, "loss": 0.6698, "step": 3016 }, { "epoch": 0.21239000351988738, "grad_norm": 2.1986477375030518, "learning_rate": 1.2814405935621056e-05, "loss": 0.6886, "step": 3017 }, { "epoch": 0.21246040126715945, "grad_norm": 2.399798631668091, "learning_rate": 1.2813517031768177e-05, "loss": 0.7428, "step": 3018 }, { "epoch": 0.21253079901443153, "grad_norm": 2.0437734127044678, "learning_rate": 1.2812627825665281e-05, "loss": 0.7163, "step": 3019 }, { "epoch": 0.21260119676170364, "grad_norm": 2.386971950531006, "learning_rate": 1.2811738317358602e-05, "loss": 0.8669, "step": 3020 }, { "epoch": 0.21267159450897571, "grad_norm": 2.5105223655700684, "learning_rate": 1.2810848506894384e-05, "loss": 0.6908, "step": 3021 }, { "epoch": 0.2127419922562478, "grad_norm": 2.2760958671569824, "learning_rate": 1.280995839431889e-05, "loss": 0.6872, "step": 3022 }, { "epoch": 0.2128123900035199, "grad_norm": 1.9517513513565063, "learning_rate": 1.2809067979678395e-05, "loss": 0.6715, "step": 3023 }, { "epoch": 0.21288278775079197, "grad_norm": 2.1396031379699707, "learning_rate": 1.2808177263019198e-05, "loss": 0.6835, "step": 3024 }, { "epoch": 0.21295318549806405, "grad_norm": 2.4746510982513428, "learning_rate": 1.2807286244387601e-05, "loss": 0.7441, "step": 3025 }, { "epoch": 0.21302358324533616, "grad_norm": 2.086435556411743, "learning_rate": 1.2806394923829935e-05, "loss": 0.8032, "step": 3026 }, { "epoch": 0.21309398099260823, "grad_norm": 2.039548873901367, "learning_rate": 1.2805503301392538e-05, "loss": 0.684, "step": 3027 }, { "epoch": 0.2131643787398803, "grad_norm": 2.899156093597412, "learning_rate": 1.2804611377121768e-05, "loss": 0.8287, "step": 3028 }, { "epoch": 0.21323477648715242, "grad_norm": 2.2719767093658447, "learning_rate": 1.2803719151063992e-05, "loss": 0.6684, "step": 3029 }, { "epoch": 0.2133051742344245, "grad_norm": 2.26334810256958, "learning_rate": 1.2802826623265606e-05, "loss": 0.854, "step": 3030 }, { "epoch": 0.2133755719816966, "grad_norm": 2.1905629634857178, "learning_rate": 1.2801933793773003e-05, "loss": 0.8057, "step": 3031 }, { "epoch": 0.21344596972896868, "grad_norm": 2.252495765686035, "learning_rate": 1.2801040662632612e-05, "loss": 0.7793, "step": 3032 }, { "epoch": 0.21351636747624075, "grad_norm": 2.382873296737671, "learning_rate": 1.2800147229890862e-05, "loss": 0.7388, "step": 3033 }, { "epoch": 0.21358676522351286, "grad_norm": 1.9096964597702026, "learning_rate": 1.2799253495594205e-05, "loss": 0.8481, "step": 3034 }, { "epoch": 0.21365716297078494, "grad_norm": 2.4100160598754883, "learning_rate": 1.2798359459789105e-05, "loss": 0.7391, "step": 3035 }, { "epoch": 0.213727560718057, "grad_norm": 2.2085776329040527, "learning_rate": 1.2797465122522044e-05, "loss": 0.874, "step": 3036 }, { "epoch": 0.21379795846532912, "grad_norm": 2.383165121078491, "learning_rate": 1.2796570483839524e-05, "loss": 0.7301, "step": 3037 }, { "epoch": 0.2138683562126012, "grad_norm": 2.4955389499664307, "learning_rate": 1.2795675543788054e-05, "loss": 0.7733, "step": 3038 }, { "epoch": 0.21393875395987327, "grad_norm": 2.8889360427856445, "learning_rate": 1.2794780302414164e-05, "loss": 0.7172, "step": 3039 }, { "epoch": 0.21400915170714538, "grad_norm": 2.7161431312561035, "learning_rate": 1.2793884759764397e-05, "loss": 0.7995, "step": 3040 }, { "epoch": 0.21407954945441746, "grad_norm": 1.8693368434906006, "learning_rate": 1.2792988915885316e-05, "loss": 0.743, "step": 3041 }, { "epoch": 0.21414994720168953, "grad_norm": 2.4464199542999268, "learning_rate": 1.2792092770823493e-05, "loss": 0.7401, "step": 3042 }, { "epoch": 0.21422034494896164, "grad_norm": 2.345980405807495, "learning_rate": 1.279119632462552e-05, "loss": 0.7839, "step": 3043 }, { "epoch": 0.21429074269623372, "grad_norm": 2.561969518661499, "learning_rate": 1.2790299577338007e-05, "loss": 0.8201, "step": 3044 }, { "epoch": 0.21436114044350582, "grad_norm": 2.6459784507751465, "learning_rate": 1.2789402529007575e-05, "loss": 0.715, "step": 3045 }, { "epoch": 0.2144315381907779, "grad_norm": 2.432788372039795, "learning_rate": 1.2788505179680864e-05, "loss": 0.8173, "step": 3046 }, { "epoch": 0.21450193593804998, "grad_norm": 2.316436767578125, "learning_rate": 1.2787607529404523e-05, "loss": 0.7594, "step": 3047 }, { "epoch": 0.21457233368532208, "grad_norm": 2.1638920307159424, "learning_rate": 1.2786709578225227e-05, "loss": 0.6862, "step": 3048 }, { "epoch": 0.21464273143259416, "grad_norm": 1.7246792316436768, "learning_rate": 1.2785811326189658e-05, "loss": 0.6713, "step": 3049 }, { "epoch": 0.21471312917986624, "grad_norm": 1.9893722534179688, "learning_rate": 1.278491277334452e-05, "loss": 0.7163, "step": 3050 }, { "epoch": 0.21478352692713834, "grad_norm": 2.329451322555542, "learning_rate": 1.2784013919736525e-05, "loss": 0.5757, "step": 3051 }, { "epoch": 0.21485392467441042, "grad_norm": 2.8759195804595947, "learning_rate": 1.278311476541241e-05, "loss": 0.8404, "step": 3052 }, { "epoch": 0.2149243224216825, "grad_norm": 2.401198625564575, "learning_rate": 1.2782215310418922e-05, "loss": 0.7956, "step": 3053 }, { "epoch": 0.2149947201689546, "grad_norm": 1.8831783533096313, "learning_rate": 1.2781315554802823e-05, "loss": 0.7821, "step": 3054 }, { "epoch": 0.21506511791622668, "grad_norm": 1.9924826622009277, "learning_rate": 1.278041549861089e-05, "loss": 0.7137, "step": 3055 }, { "epoch": 0.21513551566349876, "grad_norm": 2.1731925010681152, "learning_rate": 1.277951514188992e-05, "loss": 0.7111, "step": 3056 }, { "epoch": 0.21520591341077086, "grad_norm": 2.1799957752227783, "learning_rate": 1.2778614484686726e-05, "loss": 0.8122, "step": 3057 }, { "epoch": 0.21527631115804294, "grad_norm": 2.47405743598938, "learning_rate": 1.277771352704813e-05, "loss": 0.6836, "step": 3058 }, { "epoch": 0.21534670890531504, "grad_norm": 2.4810802936553955, "learning_rate": 1.2776812269020975e-05, "loss": 0.7044, "step": 3059 }, { "epoch": 0.21541710665258712, "grad_norm": 2.580700159072876, "learning_rate": 1.2775910710652121e-05, "loss": 0.8121, "step": 3060 }, { "epoch": 0.2154875043998592, "grad_norm": 1.9804786443710327, "learning_rate": 1.2775008851988436e-05, "loss": 0.6698, "step": 3061 }, { "epoch": 0.2155579021471313, "grad_norm": 2.9414753913879395, "learning_rate": 1.2774106693076811e-05, "loss": 0.7641, "step": 3062 }, { "epoch": 0.21562829989440338, "grad_norm": 2.3075196743011475, "learning_rate": 1.277320423396415e-05, "loss": 0.8309, "step": 3063 }, { "epoch": 0.21569869764167546, "grad_norm": 1.8802727460861206, "learning_rate": 1.2772301474697371e-05, "loss": 0.6091, "step": 3064 }, { "epoch": 0.21576909538894756, "grad_norm": 2.607598066329956, "learning_rate": 1.277139841532341e-05, "loss": 0.748, "step": 3065 }, { "epoch": 0.21583949313621964, "grad_norm": 2.0307865142822266, "learning_rate": 1.2770495055889221e-05, "loss": 0.7125, "step": 3066 }, { "epoch": 0.21590989088349172, "grad_norm": 1.8761448860168457, "learning_rate": 1.2769591396441768e-05, "loss": 0.7235, "step": 3067 }, { "epoch": 0.21598028863076382, "grad_norm": 2.5877604484558105, "learning_rate": 1.2768687437028031e-05, "loss": 0.7445, "step": 3068 }, { "epoch": 0.2160506863780359, "grad_norm": 2.6464695930480957, "learning_rate": 1.276778317769501e-05, "loss": 0.7662, "step": 3069 }, { "epoch": 0.21612108412530798, "grad_norm": 1.8829771280288696, "learning_rate": 1.276687861848972e-05, "loss": 0.8376, "step": 3070 }, { "epoch": 0.21619148187258008, "grad_norm": 2.020209312438965, "learning_rate": 1.2765973759459187e-05, "loss": 0.608, "step": 3071 }, { "epoch": 0.21626187961985216, "grad_norm": 2.0585291385650635, "learning_rate": 1.2765068600650457e-05, "loss": 0.7564, "step": 3072 }, { "epoch": 0.21633227736712426, "grad_norm": 2.3239593505859375, "learning_rate": 1.2764163142110586e-05, "loss": 0.7111, "step": 3073 }, { "epoch": 0.21640267511439634, "grad_norm": 1.9590197801589966, "learning_rate": 1.2763257383886658e-05, "loss": 0.7829, "step": 3074 }, { "epoch": 0.21647307286166842, "grad_norm": 2.2086665630340576, "learning_rate": 1.2762351326025754e-05, "loss": 0.7233, "step": 3075 }, { "epoch": 0.21654347060894052, "grad_norm": 2.1252975463867188, "learning_rate": 1.2761444968574989e-05, "loss": 0.7254, "step": 3076 }, { "epoch": 0.2166138683562126, "grad_norm": 2.4428725242614746, "learning_rate": 1.2760538311581481e-05, "loss": 0.8211, "step": 3077 }, { "epoch": 0.21668426610348468, "grad_norm": 1.7926791906356812, "learning_rate": 1.275963135509237e-05, "loss": 0.6179, "step": 3078 }, { "epoch": 0.21675466385075678, "grad_norm": 1.9777814149856567, "learning_rate": 1.2758724099154806e-05, "loss": 0.6541, "step": 3079 }, { "epoch": 0.21682506159802886, "grad_norm": 2.2217211723327637, "learning_rate": 1.2757816543815963e-05, "loss": 0.7521, "step": 3080 }, { "epoch": 0.21689545934530094, "grad_norm": 2.0575878620147705, "learning_rate": 1.2756908689123024e-05, "loss": 0.6927, "step": 3081 }, { "epoch": 0.21696585709257304, "grad_norm": 2.216799736022949, "learning_rate": 1.2756000535123188e-05, "loss": 0.773, "step": 3082 }, { "epoch": 0.21703625483984512, "grad_norm": 3.156691312789917, "learning_rate": 1.275509208186367e-05, "loss": 0.8228, "step": 3083 }, { "epoch": 0.2171066525871172, "grad_norm": 2.0203890800476074, "learning_rate": 1.2754183329391703e-05, "loss": 0.7314, "step": 3084 }, { "epoch": 0.2171770503343893, "grad_norm": 2.2178518772125244, "learning_rate": 1.2753274277754534e-05, "loss": 0.7761, "step": 3085 }, { "epoch": 0.21724744808166138, "grad_norm": 1.884958028793335, "learning_rate": 1.2752364926999424e-05, "loss": 0.7577, "step": 3086 }, { "epoch": 0.2173178458289335, "grad_norm": 2.198028087615967, "learning_rate": 1.2751455277173653e-05, "loss": 0.7901, "step": 3087 }, { "epoch": 0.21738824357620556, "grad_norm": 2.364086151123047, "learning_rate": 1.2750545328324513e-05, "loss": 0.8207, "step": 3088 }, { "epoch": 0.21745864132347764, "grad_norm": 3.1707804203033447, "learning_rate": 1.2749635080499312e-05, "loss": 0.7554, "step": 3089 }, { "epoch": 0.21752903907074975, "grad_norm": 1.94142484664917, "learning_rate": 1.2748724533745377e-05, "loss": 0.8703, "step": 3090 }, { "epoch": 0.21759943681802182, "grad_norm": 1.9685486555099487, "learning_rate": 1.2747813688110046e-05, "loss": 0.7325, "step": 3091 }, { "epoch": 0.2176698345652939, "grad_norm": 2.2313263416290283, "learning_rate": 1.2746902543640677e-05, "loss": 0.7515, "step": 3092 }, { "epoch": 0.217740232312566, "grad_norm": 1.861414909362793, "learning_rate": 1.2745991100384643e-05, "loss": 0.6922, "step": 3093 }, { "epoch": 0.21781063005983808, "grad_norm": 2.1390626430511475, "learning_rate": 1.2745079358389323e-05, "loss": 0.7437, "step": 3094 }, { "epoch": 0.21788102780711016, "grad_norm": 2.648023843765259, "learning_rate": 1.2744167317702125e-05, "loss": 0.7662, "step": 3095 }, { "epoch": 0.21795142555438227, "grad_norm": 2.350801944732666, "learning_rate": 1.2743254978370467e-05, "loss": 0.6375, "step": 3096 }, { "epoch": 0.21802182330165434, "grad_norm": 2.1566333770751953, "learning_rate": 1.2742342340441781e-05, "loss": 0.7851, "step": 3097 }, { "epoch": 0.21809222104892642, "grad_norm": 1.7837780714035034, "learning_rate": 1.2741429403963518e-05, "loss": 0.802, "step": 3098 }, { "epoch": 0.21816261879619853, "grad_norm": 2.4457571506500244, "learning_rate": 1.2740516168983139e-05, "loss": 0.7221, "step": 3099 }, { "epoch": 0.2182330165434706, "grad_norm": 1.9914522171020508, "learning_rate": 1.2739602635548122e-05, "loss": 0.7345, "step": 3100 }, { "epoch": 0.2183034142907427, "grad_norm": 1.9334121942520142, "learning_rate": 1.273868880370597e-05, "loss": 0.676, "step": 3101 }, { "epoch": 0.2183738120380148, "grad_norm": 2.285327434539795, "learning_rate": 1.273777467350419e-05, "loss": 0.7774, "step": 3102 }, { "epoch": 0.21844420978528686, "grad_norm": 1.9945783615112305, "learning_rate": 1.2736860244990306e-05, "loss": 0.7884, "step": 3103 }, { "epoch": 0.21851460753255897, "grad_norm": 2.0761477947235107, "learning_rate": 1.2735945518211865e-05, "loss": 0.7397, "step": 3104 }, { "epoch": 0.21858500527983105, "grad_norm": 2.0466208457946777, "learning_rate": 1.2735030493216419e-05, "loss": 0.7511, "step": 3105 }, { "epoch": 0.21865540302710312, "grad_norm": 1.900396704673767, "learning_rate": 1.2734115170051546e-05, "loss": 0.8102, "step": 3106 }, { "epoch": 0.21872580077437523, "grad_norm": 2.6602399349212646, "learning_rate": 1.2733199548764831e-05, "loss": 0.8003, "step": 3107 }, { "epoch": 0.2187961985216473, "grad_norm": 1.84638512134552, "learning_rate": 1.273228362940388e-05, "loss": 0.79, "step": 3108 }, { "epoch": 0.21886659626891938, "grad_norm": 1.7553513050079346, "learning_rate": 1.273136741201631e-05, "loss": 0.7535, "step": 3109 }, { "epoch": 0.2189369940161915, "grad_norm": 2.4688127040863037, "learning_rate": 1.273045089664976e-05, "loss": 0.6834, "step": 3110 }, { "epoch": 0.21900739176346357, "grad_norm": 1.801727056503296, "learning_rate": 1.2729534083351876e-05, "loss": 0.6554, "step": 3111 }, { "epoch": 0.21907778951073564, "grad_norm": 1.8008551597595215, "learning_rate": 1.2728616972170326e-05, "loss": 0.7554, "step": 3112 }, { "epoch": 0.21914818725800775, "grad_norm": 2.028029680252075, "learning_rate": 1.2727699563152793e-05, "loss": 0.7258, "step": 3113 }, { "epoch": 0.21921858500527983, "grad_norm": 2.0209741592407227, "learning_rate": 1.2726781856346972e-05, "loss": 0.6826, "step": 3114 }, { "epoch": 0.21928898275255193, "grad_norm": 2.0368385314941406, "learning_rate": 1.2725863851800576e-05, "loss": 0.7556, "step": 3115 }, { "epoch": 0.219359380499824, "grad_norm": 2.089179515838623, "learning_rate": 1.272494554956133e-05, "loss": 0.6193, "step": 3116 }, { "epoch": 0.21942977824709609, "grad_norm": 2.3366663455963135, "learning_rate": 1.2724026949676982e-05, "loss": 0.6894, "step": 3117 }, { "epoch": 0.2195001759943682, "grad_norm": 2.7887375354766846, "learning_rate": 1.272310805219529e-05, "loss": 0.7979, "step": 3118 }, { "epoch": 0.21957057374164027, "grad_norm": 2.0177929401397705, "learning_rate": 1.2722188857164026e-05, "loss": 0.7661, "step": 3119 }, { "epoch": 0.21964097148891235, "grad_norm": 1.9205858707427979, "learning_rate": 1.272126936463098e-05, "loss": 0.7748, "step": 3120 }, { "epoch": 0.21971136923618445, "grad_norm": 2.305665969848633, "learning_rate": 1.272034957464396e-05, "loss": 0.8255, "step": 3121 }, { "epoch": 0.21978176698345653, "grad_norm": 2.653742790222168, "learning_rate": 1.2719429487250787e-05, "loss": 0.7486, "step": 3122 }, { "epoch": 0.2198521647307286, "grad_norm": 2.1507763862609863, "learning_rate": 1.271850910249929e-05, "loss": 0.6984, "step": 3123 }, { "epoch": 0.2199225624780007, "grad_norm": 2.1635024547576904, "learning_rate": 1.271758842043733e-05, "loss": 0.6959, "step": 3124 }, { "epoch": 0.2199929602252728, "grad_norm": 2.470705986022949, "learning_rate": 1.2716667441112768e-05, "loss": 0.6935, "step": 3125 }, { "epoch": 0.22006335797254487, "grad_norm": 2.2289273738861084, "learning_rate": 1.2715746164573489e-05, "loss": 0.7259, "step": 3126 }, { "epoch": 0.22013375571981697, "grad_norm": 1.8323878049850464, "learning_rate": 1.271482459086739e-05, "loss": 0.6858, "step": 3127 }, { "epoch": 0.22020415346708905, "grad_norm": 2.127415180206299, "learning_rate": 1.2713902720042384e-05, "loss": 0.7815, "step": 3128 }, { "epoch": 0.22027455121436115, "grad_norm": 1.9415438175201416, "learning_rate": 1.2712980552146401e-05, "loss": 0.625, "step": 3129 }, { "epoch": 0.22034494896163323, "grad_norm": 2.0398402214050293, "learning_rate": 1.2712058087227387e-05, "loss": 0.8033, "step": 3130 }, { "epoch": 0.2204153467089053, "grad_norm": 2.242644786834717, "learning_rate": 1.2711135325333297e-05, "loss": 0.8079, "step": 3131 }, { "epoch": 0.2204857444561774, "grad_norm": 1.9508758783340454, "learning_rate": 1.2710212266512114e-05, "loss": 0.7634, "step": 3132 }, { "epoch": 0.2205561422034495, "grad_norm": 2.2474985122680664, "learning_rate": 1.2709288910811817e-05, "loss": 0.7733, "step": 3133 }, { "epoch": 0.22062653995072157, "grad_norm": 3.58011531829834, "learning_rate": 1.2708365258280423e-05, "loss": 0.6718, "step": 3134 }, { "epoch": 0.22069693769799367, "grad_norm": 1.788568377494812, "learning_rate": 1.270744130896595e-05, "loss": 0.7037, "step": 3135 }, { "epoch": 0.22076733544526575, "grad_norm": 2.20206618309021, "learning_rate": 1.2706517062916433e-05, "loss": 0.6943, "step": 3136 }, { "epoch": 0.22083773319253783, "grad_norm": 1.865045189857483, "learning_rate": 1.2705592520179927e-05, "loss": 0.6673, "step": 3137 }, { "epoch": 0.22090813093980993, "grad_norm": 2.0609347820281982, "learning_rate": 1.2704667680804497e-05, "loss": 0.7025, "step": 3138 }, { "epoch": 0.220978528687082, "grad_norm": 2.540104866027832, "learning_rate": 1.2703742544838227e-05, "loss": 0.6825, "step": 3139 }, { "epoch": 0.2210489264343541, "grad_norm": 2.545807361602783, "learning_rate": 1.2702817112329218e-05, "loss": 0.7387, "step": 3140 }, { "epoch": 0.2211193241816262, "grad_norm": 4.5565667152404785, "learning_rate": 1.2701891383325585e-05, "loss": 0.6923, "step": 3141 }, { "epoch": 0.22118972192889827, "grad_norm": 2.221820592880249, "learning_rate": 1.270096535787545e-05, "loss": 0.6842, "step": 3142 }, { "epoch": 0.22126011967617037, "grad_norm": 2.3755509853363037, "learning_rate": 1.2700039036026968e-05, "loss": 0.8101, "step": 3143 }, { "epoch": 0.22133051742344245, "grad_norm": 10.42531681060791, "learning_rate": 1.269911241782829e-05, "loss": 0.7035, "step": 3144 }, { "epoch": 0.22140091517071453, "grad_norm": 2.5192835330963135, "learning_rate": 1.26981855033276e-05, "loss": 0.851, "step": 3145 }, { "epoch": 0.22147131291798663, "grad_norm": 2.3696038722991943, "learning_rate": 1.269725829257308e-05, "loss": 0.7818, "step": 3146 }, { "epoch": 0.2215417106652587, "grad_norm": 2.1386964321136475, "learning_rate": 1.2696330785612946e-05, "loss": 0.8036, "step": 3147 }, { "epoch": 0.2216121084125308, "grad_norm": 2.114572763442993, "learning_rate": 1.2695402982495414e-05, "loss": 0.7017, "step": 3148 }, { "epoch": 0.2216825061598029, "grad_norm": 2.034846305847168, "learning_rate": 1.2694474883268723e-05, "loss": 0.7507, "step": 3149 }, { "epoch": 0.22175290390707497, "grad_norm": 1.9469513893127441, "learning_rate": 1.2693546487981126e-05, "loss": 0.7847, "step": 3150 }, { "epoch": 0.22182330165434705, "grad_norm": 1.8315056562423706, "learning_rate": 1.269261779668089e-05, "loss": 0.7024, "step": 3151 }, { "epoch": 0.22189369940161915, "grad_norm": 1.8663288354873657, "learning_rate": 1.2691688809416298e-05, "loss": 0.759, "step": 3152 }, { "epoch": 0.22196409714889123, "grad_norm": 1.9068424701690674, "learning_rate": 1.2690759526235653e-05, "loss": 0.7388, "step": 3153 }, { "epoch": 0.2220344948961633, "grad_norm": 1.9365549087524414, "learning_rate": 1.2689829947187264e-05, "loss": 0.703, "step": 3154 }, { "epoch": 0.22210489264343541, "grad_norm": 3.379399061203003, "learning_rate": 1.2688900072319464e-05, "loss": 0.7418, "step": 3155 }, { "epoch": 0.2221752903907075, "grad_norm": 2.224734306335449, "learning_rate": 1.2687969901680597e-05, "loss": 0.7811, "step": 3156 }, { "epoch": 0.2222456881379796, "grad_norm": 1.8050628900527954, "learning_rate": 1.2687039435319023e-05, "loss": 0.922, "step": 3157 }, { "epoch": 0.22231608588525167, "grad_norm": 2.086909770965576, "learning_rate": 1.268610867328312e-05, "loss": 0.7225, "step": 3158 }, { "epoch": 0.22238648363252375, "grad_norm": 1.6612790822982788, "learning_rate": 1.2685177615621277e-05, "loss": 0.7979, "step": 3159 }, { "epoch": 0.22245688137979586, "grad_norm": 2.134364366531372, "learning_rate": 1.26842462623819e-05, "loss": 0.7747, "step": 3160 }, { "epoch": 0.22252727912706793, "grad_norm": 2.1693716049194336, "learning_rate": 1.268331461361341e-05, "loss": 0.7404, "step": 3161 }, { "epoch": 0.22259767687434, "grad_norm": 2.37223219871521, "learning_rate": 1.2682382669364247e-05, "loss": 0.7126, "step": 3162 }, { "epoch": 0.22266807462161212, "grad_norm": 2.286916971206665, "learning_rate": 1.2681450429682867e-05, "loss": 0.7561, "step": 3163 }, { "epoch": 0.2227384723688842, "grad_norm": 1.9936481714248657, "learning_rate": 1.268051789461773e-05, "loss": 0.8466, "step": 3164 }, { "epoch": 0.22280887011615627, "grad_norm": 2.3903703689575195, "learning_rate": 1.2679585064217322e-05, "loss": 0.7624, "step": 3165 }, { "epoch": 0.22287926786342838, "grad_norm": 2.3252978324890137, "learning_rate": 1.2678651938530146e-05, "loss": 0.7923, "step": 3166 }, { "epoch": 0.22294966561070045, "grad_norm": 1.7455724477767944, "learning_rate": 1.267771851760471e-05, "loss": 0.7536, "step": 3167 }, { "epoch": 0.22302006335797253, "grad_norm": 1.9879755973815918, "learning_rate": 1.2676784801489547e-05, "loss": 0.7258, "step": 3168 }, { "epoch": 0.22309046110524464, "grad_norm": 2.0571601390838623, "learning_rate": 1.2675850790233203e-05, "loss": 0.8396, "step": 3169 }, { "epoch": 0.2231608588525167, "grad_norm": 1.9158333539962769, "learning_rate": 1.2674916483884234e-05, "loss": 0.789, "step": 3170 }, { "epoch": 0.22323125659978882, "grad_norm": 2.5489816665649414, "learning_rate": 1.267398188249122e-05, "loss": 0.8456, "step": 3171 }, { "epoch": 0.2233016543470609, "grad_norm": 2.0065648555755615, "learning_rate": 1.2673046986102746e-05, "loss": 0.7089, "step": 3172 }, { "epoch": 0.22337205209433297, "grad_norm": 2.149704694747925, "learning_rate": 1.2672111794767423e-05, "loss": 0.7155, "step": 3173 }, { "epoch": 0.22344244984160508, "grad_norm": 1.9714637994766235, "learning_rate": 1.2671176308533871e-05, "loss": 0.8088, "step": 3174 }, { "epoch": 0.22351284758887716, "grad_norm": 2.910207986831665, "learning_rate": 1.2670240527450725e-05, "loss": 0.7303, "step": 3175 }, { "epoch": 0.22358324533614923, "grad_norm": 1.7337642908096313, "learning_rate": 1.266930445156664e-05, "loss": 0.7715, "step": 3176 }, { "epoch": 0.22365364308342134, "grad_norm": 1.954297661781311, "learning_rate": 1.2668368080930283e-05, "loss": 0.7129, "step": 3177 }, { "epoch": 0.22372404083069342, "grad_norm": 1.7127834558486938, "learning_rate": 1.2667431415590334e-05, "loss": 0.6038, "step": 3178 }, { "epoch": 0.2237944385779655, "grad_norm": 2.1034939289093018, "learning_rate": 1.2666494455595493e-05, "loss": 0.7301, "step": 3179 }, { "epoch": 0.2238648363252376, "grad_norm": 2.2620768547058105, "learning_rate": 1.2665557200994474e-05, "loss": 0.8423, "step": 3180 }, { "epoch": 0.22393523407250968, "grad_norm": 2.529972553253174, "learning_rate": 1.2664619651836005e-05, "loss": 0.7669, "step": 3181 }, { "epoch": 0.22400563181978178, "grad_norm": 1.982102870941162, "learning_rate": 1.2663681808168833e-05, "loss": 0.8085, "step": 3182 }, { "epoch": 0.22407602956705386, "grad_norm": 1.9138230085372925, "learning_rate": 1.266274367004171e-05, "loss": 0.7864, "step": 3183 }, { "epoch": 0.22414642731432594, "grad_norm": 2.049367666244507, "learning_rate": 1.2661805237503417e-05, "loss": 0.7198, "step": 3184 }, { "epoch": 0.22421682506159804, "grad_norm": 2.417738199234009, "learning_rate": 1.2660866510602743e-05, "loss": 0.8573, "step": 3185 }, { "epoch": 0.22428722280887012, "grad_norm": 2.111278772354126, "learning_rate": 1.265992748938849e-05, "loss": 0.7382, "step": 3186 }, { "epoch": 0.2243576205561422, "grad_norm": 2.1961803436279297, "learning_rate": 1.2658988173909483e-05, "loss": 0.7495, "step": 3187 }, { "epoch": 0.2244280183034143, "grad_norm": 1.804175615310669, "learning_rate": 1.2658048564214556e-05, "loss": 0.7551, "step": 3188 }, { "epoch": 0.22449841605068638, "grad_norm": 2.9265296459198, "learning_rate": 1.2657108660352559e-05, "loss": 0.66, "step": 3189 }, { "epoch": 0.22456881379795846, "grad_norm": 1.7872343063354492, "learning_rate": 1.2656168462372358e-05, "loss": 0.7071, "step": 3190 }, { "epoch": 0.22463921154523056, "grad_norm": 1.8022059202194214, "learning_rate": 1.2655227970322839e-05, "loss": 0.81, "step": 3191 }, { "epoch": 0.22470960929250264, "grad_norm": 1.957194209098816, "learning_rate": 1.2654287184252895e-05, "loss": 0.7402, "step": 3192 }, { "epoch": 0.22478000703977472, "grad_norm": 2.19425106048584, "learning_rate": 1.2653346104211438e-05, "loss": 0.7503, "step": 3193 }, { "epoch": 0.22485040478704682, "grad_norm": 2.351741075515747, "learning_rate": 1.2652404730247397e-05, "loss": 0.7145, "step": 3194 }, { "epoch": 0.2249208025343189, "grad_norm": 2.1414263248443604, "learning_rate": 1.2651463062409719e-05, "loss": 0.7684, "step": 3195 }, { "epoch": 0.224991200281591, "grad_norm": 2.192631959915161, "learning_rate": 1.2650521100747353e-05, "loss": 0.6658, "step": 3196 }, { "epoch": 0.22506159802886308, "grad_norm": 2.1622676849365234, "learning_rate": 1.2649578845309279e-05, "loss": 0.6442, "step": 3197 }, { "epoch": 0.22513199577613516, "grad_norm": 2.0525078773498535, "learning_rate": 1.2648636296144487e-05, "loss": 0.6836, "step": 3198 }, { "epoch": 0.22520239352340726, "grad_norm": 2.308490753173828, "learning_rate": 1.2647693453301976e-05, "loss": 0.8107, "step": 3199 }, { "epoch": 0.22527279127067934, "grad_norm": 1.9956773519515991, "learning_rate": 1.2646750316830767e-05, "loss": 0.785, "step": 3200 }, { "epoch": 0.22534318901795142, "grad_norm": 2.0635197162628174, "learning_rate": 1.2645806886779895e-05, "loss": 0.7987, "step": 3201 }, { "epoch": 0.22541358676522352, "grad_norm": 2.1178529262542725, "learning_rate": 1.264486316319841e-05, "loss": 0.7817, "step": 3202 }, { "epoch": 0.2254839845124956, "grad_norm": 2.0787389278411865, "learning_rate": 1.264391914613538e-05, "loss": 0.7746, "step": 3203 }, { "epoch": 0.22555438225976768, "grad_norm": 2.2193291187286377, "learning_rate": 1.264297483563988e-05, "loss": 0.7616, "step": 3204 }, { "epoch": 0.22562478000703978, "grad_norm": 2.182554244995117, "learning_rate": 1.2642030231761007e-05, "loss": 0.6281, "step": 3205 }, { "epoch": 0.22569517775431186, "grad_norm": 2.066782236099243, "learning_rate": 1.2641085334547872e-05, "loss": 0.8186, "step": 3206 }, { "epoch": 0.22576557550158394, "grad_norm": 2.4216160774230957, "learning_rate": 1.2640140144049603e-05, "loss": 0.7059, "step": 3207 }, { "epoch": 0.22583597324885604, "grad_norm": 2.277552604675293, "learning_rate": 1.263919466031534e-05, "loss": 0.7024, "step": 3208 }, { "epoch": 0.22590637099612812, "grad_norm": 2.1183273792266846, "learning_rate": 1.2638248883394237e-05, "loss": 0.7392, "step": 3209 }, { "epoch": 0.22597676874340022, "grad_norm": 2.1569697856903076, "learning_rate": 1.2637302813335472e-05, "loss": 0.681, "step": 3210 }, { "epoch": 0.2260471664906723, "grad_norm": 2.119476318359375, "learning_rate": 1.2636356450188224e-05, "loss": 0.8602, "step": 3211 }, { "epoch": 0.22611756423794438, "grad_norm": 2.224773406982422, "learning_rate": 1.2635409794001704e-05, "loss": 0.6906, "step": 3212 }, { "epoch": 0.22618796198521648, "grad_norm": 2.2786455154418945, "learning_rate": 1.2634462844825122e-05, "loss": 0.6978, "step": 3213 }, { "epoch": 0.22625835973248856, "grad_norm": 1.9598420858383179, "learning_rate": 1.2633515602707715e-05, "loss": 0.736, "step": 3214 }, { "epoch": 0.22632875747976064, "grad_norm": 2.5283362865448, "learning_rate": 1.263256806769873e-05, "loss": 0.8667, "step": 3215 }, { "epoch": 0.22639915522703274, "grad_norm": 2.3065173625946045, "learning_rate": 1.263162023984743e-05, "loss": 0.6962, "step": 3216 }, { "epoch": 0.22646955297430482, "grad_norm": 2.328864336013794, "learning_rate": 1.2630672119203093e-05, "loss": 0.6879, "step": 3217 }, { "epoch": 0.2265399507215769, "grad_norm": 2.1773393154144287, "learning_rate": 1.2629723705815013e-05, "loss": 0.758, "step": 3218 }, { "epoch": 0.226610348468849, "grad_norm": 2.485766887664795, "learning_rate": 1.2628774999732499e-05, "loss": 0.8474, "step": 3219 }, { "epoch": 0.22668074621612108, "grad_norm": 2.210015058517456, "learning_rate": 1.2627826001004875e-05, "loss": 0.8623, "step": 3220 }, { "epoch": 0.22675114396339316, "grad_norm": 1.7394312620162964, "learning_rate": 1.262687670968148e-05, "loss": 0.6691, "step": 3221 }, { "epoch": 0.22682154171066526, "grad_norm": 1.9511582851409912, "learning_rate": 1.2625927125811672e-05, "loss": 0.6788, "step": 3222 }, { "epoch": 0.22689193945793734, "grad_norm": 2.749260663986206, "learning_rate": 1.2624977249444812e-05, "loss": 0.7989, "step": 3223 }, { "epoch": 0.22696233720520945, "grad_norm": 2.1582932472229004, "learning_rate": 1.2624027080630295e-05, "loss": 0.7314, "step": 3224 }, { "epoch": 0.22703273495248152, "grad_norm": 2.693284273147583, "learning_rate": 1.2623076619417516e-05, "loss": 0.8089, "step": 3225 }, { "epoch": 0.2271031326997536, "grad_norm": 2.0972797870635986, "learning_rate": 1.2622125865855889e-05, "loss": 0.6247, "step": 3226 }, { "epoch": 0.2271735304470257, "grad_norm": 1.7250609397888184, "learning_rate": 1.2621174819994847e-05, "loss": 0.781, "step": 3227 }, { "epoch": 0.22724392819429778, "grad_norm": 2.4797232151031494, "learning_rate": 1.2620223481883836e-05, "loss": 0.836, "step": 3228 }, { "epoch": 0.22731432594156986, "grad_norm": 1.7373640537261963, "learning_rate": 1.2619271851572316e-05, "loss": 0.7591, "step": 3229 }, { "epoch": 0.22738472368884197, "grad_norm": 1.588986873626709, "learning_rate": 1.2618319929109763e-05, "loss": 0.9471, "step": 3230 }, { "epoch": 0.22745512143611404, "grad_norm": 2.433804750442505, "learning_rate": 1.2617367714545671e-05, "loss": 0.7179, "step": 3231 }, { "epoch": 0.22752551918338612, "grad_norm": 2.173356533050537, "learning_rate": 1.2616415207929542e-05, "loss": 0.8176, "step": 3232 }, { "epoch": 0.22759591693065823, "grad_norm": 2.1565680503845215, "learning_rate": 1.26154624093109e-05, "loss": 0.6832, "step": 3233 }, { "epoch": 0.2276663146779303, "grad_norm": 1.8066478967666626, "learning_rate": 1.2614509318739281e-05, "loss": 0.7255, "step": 3234 }, { "epoch": 0.22773671242520238, "grad_norm": 2.3634228706359863, "learning_rate": 1.2613555936264239e-05, "loss": 0.8258, "step": 3235 }, { "epoch": 0.2278071101724745, "grad_norm": 1.8965363502502441, "learning_rate": 1.261260226193534e-05, "loss": 0.6775, "step": 3236 }, { "epoch": 0.22787750791974656, "grad_norm": 2.0862910747528076, "learning_rate": 1.2611648295802165e-05, "loss": 0.6308, "step": 3237 }, { "epoch": 0.22794790566701867, "grad_norm": 2.4328742027282715, "learning_rate": 1.2610694037914312e-05, "loss": 0.7868, "step": 3238 }, { "epoch": 0.22801830341429075, "grad_norm": 2.1927382946014404, "learning_rate": 1.26097394883214e-05, "loss": 0.7635, "step": 3239 }, { "epoch": 0.22808870116156282, "grad_norm": 2.309882402420044, "learning_rate": 1.2608784647073046e-05, "loss": 0.6652, "step": 3240 }, { "epoch": 0.22815909890883493, "grad_norm": 2.3755128383636475, "learning_rate": 1.26078295142189e-05, "loss": 0.696, "step": 3241 }, { "epoch": 0.228229496656107, "grad_norm": 2.011117696762085, "learning_rate": 1.260687408980862e-05, "loss": 0.7774, "step": 3242 }, { "epoch": 0.22829989440337908, "grad_norm": 1.9716317653656006, "learning_rate": 1.2605918373891876e-05, "loss": 0.7241, "step": 3243 }, { "epoch": 0.2283702921506512, "grad_norm": 2.136734962463379, "learning_rate": 1.2604962366518361e-05, "loss": 0.7392, "step": 3244 }, { "epoch": 0.22844068989792327, "grad_norm": 2.2782981395721436, "learning_rate": 1.2604006067737775e-05, "loss": 0.6244, "step": 3245 }, { "epoch": 0.22851108764519534, "grad_norm": 1.8229858875274658, "learning_rate": 1.260304947759984e-05, "loss": 0.7783, "step": 3246 }, { "epoch": 0.22858148539246745, "grad_norm": 2.9556033611297607, "learning_rate": 1.2602092596154286e-05, "loss": 0.7609, "step": 3247 }, { "epoch": 0.22865188313973953, "grad_norm": 2.0882041454315186, "learning_rate": 1.2601135423450865e-05, "loss": 0.8806, "step": 3248 }, { "epoch": 0.2287222808870116, "grad_norm": 1.9104455709457397, "learning_rate": 1.2600177959539342e-05, "loss": 0.7326, "step": 3249 }, { "epoch": 0.2287926786342837, "grad_norm": 1.987589955329895, "learning_rate": 1.2599220204469494e-05, "loss": 0.7701, "step": 3250 }, { "epoch": 0.22886307638155579, "grad_norm": 1.8129445314407349, "learning_rate": 1.2598262158291117e-05, "loss": 0.7092, "step": 3251 }, { "epoch": 0.2289334741288279, "grad_norm": 2.353976011276245, "learning_rate": 1.2597303821054019e-05, "loss": 0.6502, "step": 3252 }, { "epoch": 0.22900387187609997, "grad_norm": 1.659989356994629, "learning_rate": 1.2596345192808028e-05, "loss": 0.6402, "step": 3253 }, { "epoch": 0.22907426962337205, "grad_norm": 2.2390048503875732, "learning_rate": 1.259538627360298e-05, "loss": 0.7011, "step": 3254 }, { "epoch": 0.22914466737064415, "grad_norm": 2.111513376235962, "learning_rate": 1.2594427063488733e-05, "loss": 0.7345, "step": 3255 }, { "epoch": 0.22921506511791623, "grad_norm": 1.6954584121704102, "learning_rate": 1.2593467562515157e-05, "loss": 0.6646, "step": 3256 }, { "epoch": 0.2292854628651883, "grad_norm": 10.823022842407227, "learning_rate": 1.2592507770732136e-05, "loss": 0.7804, "step": 3257 }, { "epoch": 0.2293558606124604, "grad_norm": 1.9586788415908813, "learning_rate": 1.2591547688189572e-05, "loss": 0.7299, "step": 3258 }, { "epoch": 0.2294262583597325, "grad_norm": 2.0736536979675293, "learning_rate": 1.2590587314937379e-05, "loss": 0.8146, "step": 3259 }, { "epoch": 0.22949665610700457, "grad_norm": 3.873579740524292, "learning_rate": 1.258962665102549e-05, "loss": 0.7456, "step": 3260 }, { "epoch": 0.22956705385427667, "grad_norm": 2.1225738525390625, "learning_rate": 1.2588665696503847e-05, "loss": 0.7465, "step": 3261 }, { "epoch": 0.22963745160154875, "grad_norm": 2.3662734031677246, "learning_rate": 1.2587704451422414e-05, "loss": 0.78, "step": 3262 }, { "epoch": 0.22970784934882083, "grad_norm": 2.075134038925171, "learning_rate": 1.2586742915831166e-05, "loss": 0.624, "step": 3263 }, { "epoch": 0.22977824709609293, "grad_norm": 2.116715669631958, "learning_rate": 1.2585781089780092e-05, "loss": 0.6531, "step": 3264 }, { "epoch": 0.229848644843365, "grad_norm": 1.956315279006958, "learning_rate": 1.2584818973319203e-05, "loss": 0.7924, "step": 3265 }, { "epoch": 0.2299190425906371, "grad_norm": 2.15643572807312, "learning_rate": 1.2583856566498518e-05, "loss": 0.7468, "step": 3266 }, { "epoch": 0.2299894403379092, "grad_norm": 3.096966028213501, "learning_rate": 1.2582893869368071e-05, "loss": 0.7962, "step": 3267 }, { "epoch": 0.23005983808518127, "grad_norm": 2.2163333892822266, "learning_rate": 1.2581930881977918e-05, "loss": 0.7396, "step": 3268 }, { "epoch": 0.23013023583245337, "grad_norm": 3.69081449508667, "learning_rate": 1.258096760437812e-05, "loss": 0.7171, "step": 3269 }, { "epoch": 0.23020063357972545, "grad_norm": 2.072676658630371, "learning_rate": 1.2580004036618765e-05, "loss": 0.7301, "step": 3270 }, { "epoch": 0.23027103132699753, "grad_norm": 2.1513185501098633, "learning_rate": 1.2579040178749944e-05, "loss": 0.7662, "step": 3271 }, { "epoch": 0.23034142907426963, "grad_norm": 2.266205072402954, "learning_rate": 1.2578076030821772e-05, "loss": 0.6636, "step": 3272 }, { "epoch": 0.2304118268215417, "grad_norm": 3.4740803241729736, "learning_rate": 1.2577111592884378e-05, "loss": 0.8187, "step": 3273 }, { "epoch": 0.2304822245688138, "grad_norm": 2.072700023651123, "learning_rate": 1.2576146864987897e-05, "loss": 0.7506, "step": 3274 }, { "epoch": 0.2305526223160859, "grad_norm": 1.8920912742614746, "learning_rate": 1.2575181847182495e-05, "loss": 0.8067, "step": 3275 }, { "epoch": 0.23062302006335797, "grad_norm": 1.8544228076934814, "learning_rate": 1.2574216539518337e-05, "loss": 0.8246, "step": 3276 }, { "epoch": 0.23069341781063005, "grad_norm": 2.289433717727661, "learning_rate": 1.2573250942045612e-05, "loss": 0.7043, "step": 3277 }, { "epoch": 0.23076381555790215, "grad_norm": 1.971211552619934, "learning_rate": 1.2572285054814523e-05, "loss": 0.7838, "step": 3278 }, { "epoch": 0.23083421330517423, "grad_norm": 2.7419793605804443, "learning_rate": 1.257131887787529e-05, "loss": 0.787, "step": 3279 }, { "epoch": 0.23090461105244633, "grad_norm": 2.144624948501587, "learning_rate": 1.2570352411278141e-05, "loss": 0.6714, "step": 3280 }, { "epoch": 0.2309750087997184, "grad_norm": 2.1271135807037354, "learning_rate": 1.2569385655073326e-05, "loss": 0.6856, "step": 3281 }, { "epoch": 0.2310454065469905, "grad_norm": 2.156649589538574, "learning_rate": 1.2568418609311107e-05, "loss": 0.685, "step": 3282 }, { "epoch": 0.2311158042942626, "grad_norm": 2.0018012523651123, "learning_rate": 1.2567451274041761e-05, "loss": 0.7608, "step": 3283 }, { "epoch": 0.23118620204153467, "grad_norm": 2.4628849029541016, "learning_rate": 1.2566483649315582e-05, "loss": 0.7235, "step": 3284 }, { "epoch": 0.23125659978880675, "grad_norm": 2.0791099071502686, "learning_rate": 1.2565515735182876e-05, "loss": 0.7281, "step": 3285 }, { "epoch": 0.23132699753607885, "grad_norm": 2.190547466278076, "learning_rate": 1.2564547531693965e-05, "loss": 0.8616, "step": 3286 }, { "epoch": 0.23139739528335093, "grad_norm": 2.345242738723755, "learning_rate": 1.256357903889919e-05, "loss": 0.7768, "step": 3287 }, { "epoch": 0.231467793030623, "grad_norm": 1.7824294567108154, "learning_rate": 1.2562610256848902e-05, "loss": 0.6736, "step": 3288 }, { "epoch": 0.23153819077789511, "grad_norm": 2.132648229598999, "learning_rate": 1.256164118559347e-05, "loss": 0.7699, "step": 3289 }, { "epoch": 0.2316085885251672, "grad_norm": 2.1597702503204346, "learning_rate": 1.2560671825183276e-05, "loss": 0.7806, "step": 3290 }, { "epoch": 0.23167898627243927, "grad_norm": 2.6501755714416504, "learning_rate": 1.2559702175668714e-05, "loss": 0.6946, "step": 3291 }, { "epoch": 0.23174938401971137, "grad_norm": 1.8346039056777954, "learning_rate": 1.2558732237100204e-05, "loss": 0.726, "step": 3292 }, { "epoch": 0.23181978176698345, "grad_norm": 1.824737310409546, "learning_rate": 1.255776200952817e-05, "loss": 0.7255, "step": 3293 }, { "epoch": 0.23189017951425556, "grad_norm": 1.914135217666626, "learning_rate": 1.2556791493003055e-05, "loss": 0.7906, "step": 3294 }, { "epoch": 0.23196057726152763, "grad_norm": 2.1352429389953613, "learning_rate": 1.2555820687575318e-05, "loss": 0.7275, "step": 3295 }, { "epoch": 0.2320309750087997, "grad_norm": 2.0465381145477295, "learning_rate": 1.2554849593295432e-05, "loss": 0.8539, "step": 3296 }, { "epoch": 0.23210137275607182, "grad_norm": 2.1793861389160156, "learning_rate": 1.2553878210213886e-05, "loss": 0.8031, "step": 3297 }, { "epoch": 0.2321717705033439, "grad_norm": 1.9403294324874878, "learning_rate": 1.2552906538381181e-05, "loss": 0.6702, "step": 3298 }, { "epoch": 0.23224216825061597, "grad_norm": 1.9818615913391113, "learning_rate": 1.2551934577847836e-05, "loss": 0.7738, "step": 3299 }, { "epoch": 0.23231256599788808, "grad_norm": 1.8916300535202026, "learning_rate": 1.2550962328664384e-05, "loss": 0.7712, "step": 3300 }, { "epoch": 0.23238296374516015, "grad_norm": 2.0202574729919434, "learning_rate": 1.2549989790881371e-05, "loss": 0.6601, "step": 3301 }, { "epoch": 0.23245336149243223, "grad_norm": 2.407169818878174, "learning_rate": 1.2549016964549367e-05, "loss": 0.7829, "step": 3302 }, { "epoch": 0.23252375923970434, "grad_norm": 1.990968942642212, "learning_rate": 1.2548043849718943e-05, "loss": 0.691, "step": 3303 }, { "epoch": 0.2325941569869764, "grad_norm": 1.8612924814224243, "learning_rate": 1.2547070446440695e-05, "loss": 0.725, "step": 3304 }, { "epoch": 0.2326645547342485, "grad_norm": 2.2716197967529297, "learning_rate": 1.2546096754765232e-05, "loss": 0.8063, "step": 3305 }, { "epoch": 0.2327349524815206, "grad_norm": 1.826540231704712, "learning_rate": 1.2545122774743174e-05, "loss": 0.7283, "step": 3306 }, { "epoch": 0.23280535022879267, "grad_norm": 2.2173500061035156, "learning_rate": 1.2544148506425162e-05, "loss": 0.7555, "step": 3307 }, { "epoch": 0.23287574797606478, "grad_norm": 2.032494306564331, "learning_rate": 1.2543173949861847e-05, "loss": 0.6161, "step": 3308 }, { "epoch": 0.23294614572333686, "grad_norm": 2.063899040222168, "learning_rate": 1.25421991051039e-05, "loss": 0.6629, "step": 3309 }, { "epoch": 0.23301654347060893, "grad_norm": 1.859447717666626, "learning_rate": 1.2541223972202002e-05, "loss": 0.6877, "step": 3310 }, { "epoch": 0.23308694121788104, "grad_norm": 2.1296629905700684, "learning_rate": 1.2540248551206848e-05, "loss": 0.7755, "step": 3311 }, { "epoch": 0.23315733896515312, "grad_norm": 1.701793909072876, "learning_rate": 1.2539272842169157e-05, "loss": 0.7593, "step": 3312 }, { "epoch": 0.2332277367124252, "grad_norm": 2.098773956298828, "learning_rate": 1.2538296845139655e-05, "loss": 0.7793, "step": 3313 }, { "epoch": 0.2332981344596973, "grad_norm": 1.987886905670166, "learning_rate": 1.2537320560169082e-05, "loss": 0.7122, "step": 3314 }, { "epoch": 0.23336853220696938, "grad_norm": 1.7746961116790771, "learning_rate": 1.2536343987308202e-05, "loss": 0.8063, "step": 3315 }, { "epoch": 0.23343892995424145, "grad_norm": 2.022796630859375, "learning_rate": 1.253536712660778e-05, "loss": 0.8551, "step": 3316 }, { "epoch": 0.23350932770151356, "grad_norm": 2.137247323989868, "learning_rate": 1.2534389978118609e-05, "loss": 0.7208, "step": 3317 }, { "epoch": 0.23357972544878564, "grad_norm": 2.0972867012023926, "learning_rate": 1.253341254189149e-05, "loss": 0.7424, "step": 3318 }, { "epoch": 0.2336501231960577, "grad_norm": 1.7825546264648438, "learning_rate": 1.2532434817977242e-05, "loss": 0.6755, "step": 3319 }, { "epoch": 0.23372052094332982, "grad_norm": 1.8914794921875, "learning_rate": 1.25314568064267e-05, "loss": 0.7222, "step": 3320 }, { "epoch": 0.2337909186906019, "grad_norm": 2.1860299110412598, "learning_rate": 1.2530478507290708e-05, "loss": 0.762, "step": 3321 }, { "epoch": 0.233861316437874, "grad_norm": 1.8418399095535278, "learning_rate": 1.2529499920620127e-05, "loss": 0.6711, "step": 3322 }, { "epoch": 0.23393171418514608, "grad_norm": 2.1422972679138184, "learning_rate": 1.252852104646584e-05, "loss": 0.6227, "step": 3323 }, { "epoch": 0.23400211193241816, "grad_norm": 2.1758105754852295, "learning_rate": 1.2527541884878737e-05, "loss": 0.8405, "step": 3324 }, { "epoch": 0.23407250967969026, "grad_norm": 2.5638813972473145, "learning_rate": 1.2526562435909723e-05, "loss": 0.8173, "step": 3325 }, { "epoch": 0.23414290742696234, "grad_norm": 2.017564535140991, "learning_rate": 1.2525582699609724e-05, "loss": 0.757, "step": 3326 }, { "epoch": 0.23421330517423442, "grad_norm": 1.9185298681259155, "learning_rate": 1.2524602676029678e-05, "loss": 0.6624, "step": 3327 }, { "epoch": 0.23428370292150652, "grad_norm": 2.1272456645965576, "learning_rate": 1.2523622365220534e-05, "loss": 0.8177, "step": 3328 }, { "epoch": 0.2343541006687786, "grad_norm": 2.089553117752075, "learning_rate": 1.252264176723326e-05, "loss": 0.6694, "step": 3329 }, { "epoch": 0.23442449841605068, "grad_norm": 1.7995353937149048, "learning_rate": 1.252166088211884e-05, "loss": 0.7053, "step": 3330 }, { "epoch": 0.23449489616332278, "grad_norm": 2.19932222366333, "learning_rate": 1.252067970992827e-05, "loss": 0.7339, "step": 3331 }, { "epoch": 0.23456529391059486, "grad_norm": 1.7885493040084839, "learning_rate": 1.251969825071256e-05, "loss": 0.6741, "step": 3332 }, { "epoch": 0.23463569165786693, "grad_norm": 3.0731098651885986, "learning_rate": 1.251871650452274e-05, "loss": 0.7311, "step": 3333 }, { "epoch": 0.23470608940513904, "grad_norm": 1.8317965269088745, "learning_rate": 1.2517734471409853e-05, "loss": 0.7598, "step": 3334 }, { "epoch": 0.23477648715241112, "grad_norm": 1.9327473640441895, "learning_rate": 1.2516752151424953e-05, "loss": 0.8459, "step": 3335 }, { "epoch": 0.23484688489968322, "grad_norm": 2.6123898029327393, "learning_rate": 1.251576954461911e-05, "loss": 0.6313, "step": 3336 }, { "epoch": 0.2349172826469553, "grad_norm": 2.218064069747925, "learning_rate": 1.2514786651043416e-05, "loss": 0.7803, "step": 3337 }, { "epoch": 0.23498768039422738, "grad_norm": 2.0571038722991943, "learning_rate": 1.2513803470748967e-05, "loss": 0.8046, "step": 3338 }, { "epoch": 0.23505807814149948, "grad_norm": 2.0950565338134766, "learning_rate": 1.2512820003786884e-05, "loss": 0.7992, "step": 3339 }, { "epoch": 0.23512847588877156, "grad_norm": 1.7326592206954956, "learning_rate": 1.2511836250208296e-05, "loss": 0.6797, "step": 3340 }, { "epoch": 0.23519887363604364, "grad_norm": 1.8432408571243286, "learning_rate": 1.2510852210064348e-05, "loss": 0.7448, "step": 3341 }, { "epoch": 0.23526927138331574, "grad_norm": 2.3240628242492676, "learning_rate": 1.2509867883406204e-05, "loss": 0.8235, "step": 3342 }, { "epoch": 0.23533966913058782, "grad_norm": 1.771673560142517, "learning_rate": 1.2508883270285037e-05, "loss": 0.7017, "step": 3343 }, { "epoch": 0.2354100668778599, "grad_norm": 1.9256126880645752, "learning_rate": 1.250789837075204e-05, "loss": 0.7592, "step": 3344 }, { "epoch": 0.235480464625132, "grad_norm": 1.8597882986068726, "learning_rate": 1.2506913184858421e-05, "loss": 0.5901, "step": 3345 }, { "epoch": 0.23555086237240408, "grad_norm": 2.531994581222534, "learning_rate": 1.2505927712655397e-05, "loss": 0.6572, "step": 3346 }, { "epoch": 0.23562126011967616, "grad_norm": 2.130038022994995, "learning_rate": 1.2504941954194206e-05, "loss": 0.8292, "step": 3347 }, { "epoch": 0.23569165786694826, "grad_norm": 2.226653575897217, "learning_rate": 1.2503955909526096e-05, "loss": 0.6948, "step": 3348 }, { "epoch": 0.23576205561422034, "grad_norm": 1.9703153371810913, "learning_rate": 1.2502969578702331e-05, "loss": 0.7354, "step": 3349 }, { "epoch": 0.23583245336149244, "grad_norm": 4.345832824707031, "learning_rate": 1.2501982961774198e-05, "loss": 0.7714, "step": 3350 }, { "epoch": 0.23590285110876452, "grad_norm": 1.6267188787460327, "learning_rate": 1.2500996058792986e-05, "loss": 0.7185, "step": 3351 }, { "epoch": 0.2359732488560366, "grad_norm": 2.6693079471588135, "learning_rate": 1.2500008869810006e-05, "loss": 0.6926, "step": 3352 }, { "epoch": 0.2360436466033087, "grad_norm": 2.3278844356536865, "learning_rate": 1.2499021394876585e-05, "loss": 0.6899, "step": 3353 }, { "epoch": 0.23611404435058078, "grad_norm": 2.0176401138305664, "learning_rate": 1.249803363404406e-05, "loss": 0.7467, "step": 3354 }, { "epoch": 0.23618444209785286, "grad_norm": 2.1079938411712646, "learning_rate": 1.2497045587363788e-05, "loss": 0.7759, "step": 3355 }, { "epoch": 0.23625483984512496, "grad_norm": 2.172056198120117, "learning_rate": 1.2496057254887138e-05, "loss": 0.7632, "step": 3356 }, { "epoch": 0.23632523759239704, "grad_norm": 1.7789348363876343, "learning_rate": 1.2495068636665492e-05, "loss": 0.7316, "step": 3357 }, { "epoch": 0.23639563533966912, "grad_norm": 1.8637534379959106, "learning_rate": 1.2494079732750253e-05, "loss": 0.6626, "step": 3358 }, { "epoch": 0.23646603308694122, "grad_norm": 1.9918080568313599, "learning_rate": 1.2493090543192832e-05, "loss": 0.8268, "step": 3359 }, { "epoch": 0.2365364308342133, "grad_norm": 2.314735174179077, "learning_rate": 1.2492101068044662e-05, "loss": 0.6877, "step": 3360 }, { "epoch": 0.23660682858148538, "grad_norm": 2.111137628555298, "learning_rate": 1.2491111307357179e-05, "loss": 0.64, "step": 3361 }, { "epoch": 0.23667722632875748, "grad_norm": 1.7992448806762695, "learning_rate": 1.2490121261181847e-05, "loss": 0.7117, "step": 3362 }, { "epoch": 0.23674762407602956, "grad_norm": 2.022582530975342, "learning_rate": 1.248913092957014e-05, "loss": 0.7247, "step": 3363 }, { "epoch": 0.23681802182330167, "grad_norm": 1.6998398303985596, "learning_rate": 1.2488140312573544e-05, "loss": 0.8147, "step": 3364 }, { "epoch": 0.23688841957057374, "grad_norm": 2.8269848823547363, "learning_rate": 1.2487149410243562e-05, "loss": 0.6946, "step": 3365 }, { "epoch": 0.23695881731784582, "grad_norm": 2.0765278339385986, "learning_rate": 1.2486158222631715e-05, "loss": 0.7777, "step": 3366 }, { "epoch": 0.23702921506511793, "grad_norm": 2.3071203231811523, "learning_rate": 1.248516674978953e-05, "loss": 0.735, "step": 3367 }, { "epoch": 0.23709961281239, "grad_norm": 2.1637556552886963, "learning_rate": 1.248417499176856e-05, "loss": 0.7739, "step": 3368 }, { "epoch": 0.23717001055966208, "grad_norm": 1.837778925895691, "learning_rate": 1.2483182948620367e-05, "loss": 0.6515, "step": 3369 }, { "epoch": 0.2372404083069342, "grad_norm": 1.7996625900268555, "learning_rate": 1.2482190620396524e-05, "loss": 0.8223, "step": 3370 }, { "epoch": 0.23731080605420626, "grad_norm": 1.9711017608642578, "learning_rate": 1.2481198007148627e-05, "loss": 0.6749, "step": 3371 }, { "epoch": 0.23738120380147834, "grad_norm": 2.286323308944702, "learning_rate": 1.2480205108928281e-05, "loss": 0.7506, "step": 3372 }, { "epoch": 0.23745160154875045, "grad_norm": 2.15956974029541, "learning_rate": 1.2479211925787109e-05, "loss": 0.7751, "step": 3373 }, { "epoch": 0.23752199929602252, "grad_norm": 2.0714099407196045, "learning_rate": 1.2478218457776747e-05, "loss": 0.6823, "step": 3374 }, { "epoch": 0.2375923970432946, "grad_norm": 1.9976571798324585, "learning_rate": 1.2477224704948844e-05, "loss": 0.8358, "step": 3375 }, { "epoch": 0.2376627947905667, "grad_norm": 2.153984546661377, "learning_rate": 1.247623066735507e-05, "loss": 0.6814, "step": 3376 }, { "epoch": 0.23773319253783878, "grad_norm": 1.961560845375061, "learning_rate": 1.2475236345047103e-05, "loss": 0.6361, "step": 3377 }, { "epoch": 0.2378035902851109, "grad_norm": 2.1614062786102295, "learning_rate": 1.247424173807664e-05, "loss": 0.801, "step": 3378 }, { "epoch": 0.23787398803238297, "grad_norm": 2.3563649654388428, "learning_rate": 1.247324684649539e-05, "loss": 0.6984, "step": 3379 }, { "epoch": 0.23794438577965504, "grad_norm": 2.256253719329834, "learning_rate": 1.2472251670355082e-05, "loss": 0.6991, "step": 3380 }, { "epoch": 0.23801478352692715, "grad_norm": 2.187032699584961, "learning_rate": 1.2471256209707453e-05, "loss": 0.7896, "step": 3381 }, { "epoch": 0.23808518127419923, "grad_norm": 2.2996063232421875, "learning_rate": 1.2470260464604258e-05, "loss": 0.6079, "step": 3382 }, { "epoch": 0.2381555790214713, "grad_norm": 2.314194917678833, "learning_rate": 1.2469264435097268e-05, "loss": 0.7657, "step": 3383 }, { "epoch": 0.2382259767687434, "grad_norm": 1.9951741695404053, "learning_rate": 1.2468268121238265e-05, "loss": 0.7956, "step": 3384 }, { "epoch": 0.23829637451601549, "grad_norm": 2.2923176288604736, "learning_rate": 1.2467271523079053e-05, "loss": 0.8395, "step": 3385 }, { "epoch": 0.23836677226328756, "grad_norm": 1.6465331315994263, "learning_rate": 1.246627464067144e-05, "loss": 0.7065, "step": 3386 }, { "epoch": 0.23843717001055967, "grad_norm": 1.7634378671646118, "learning_rate": 1.246527747406726e-05, "loss": 0.7486, "step": 3387 }, { "epoch": 0.23850756775783175, "grad_norm": 2.0591142177581787, "learning_rate": 1.2464280023318353e-05, "loss": 0.7198, "step": 3388 }, { "epoch": 0.23857796550510382, "grad_norm": 2.067718267440796, "learning_rate": 1.246328228847658e-05, "loss": 0.775, "step": 3389 }, { "epoch": 0.23864836325237593, "grad_norm": 1.8368442058563232, "learning_rate": 1.2462284269593811e-05, "loss": 0.7609, "step": 3390 }, { "epoch": 0.238718760999648, "grad_norm": 1.6994582414627075, "learning_rate": 1.2461285966721938e-05, "loss": 0.7957, "step": 3391 }, { "epoch": 0.2387891587469201, "grad_norm": 1.8519991636276245, "learning_rate": 1.2460287379912862e-05, "loss": 0.7701, "step": 3392 }, { "epoch": 0.2388595564941922, "grad_norm": 2.106289863586426, "learning_rate": 1.24592885092185e-05, "loss": 0.7148, "step": 3393 }, { "epoch": 0.23892995424146427, "grad_norm": 1.9190682172775269, "learning_rate": 1.2458289354690782e-05, "loss": 0.9365, "step": 3394 }, { "epoch": 0.23900035198873637, "grad_norm": 1.5822502374649048, "learning_rate": 1.2457289916381659e-05, "loss": 0.6171, "step": 3395 }, { "epoch": 0.23907074973600845, "grad_norm": 1.811082124710083, "learning_rate": 1.2456290194343091e-05, "loss": 0.7025, "step": 3396 }, { "epoch": 0.23914114748328053, "grad_norm": 1.7411088943481445, "learning_rate": 1.2455290188627053e-05, "loss": 0.623, "step": 3397 }, { "epoch": 0.23921154523055263, "grad_norm": 1.8968454599380493, "learning_rate": 1.2454289899285536e-05, "loss": 0.7217, "step": 3398 }, { "epoch": 0.2392819429778247, "grad_norm": 2.3269407749176025, "learning_rate": 1.245328932637055e-05, "loss": 0.7893, "step": 3399 }, { "epoch": 0.23935234072509678, "grad_norm": 1.9997438192367554, "learning_rate": 1.2452288469934113e-05, "loss": 0.7956, "step": 3400 }, { "epoch": 0.2394227384723689, "grad_norm": 1.8075226545333862, "learning_rate": 1.2451287330028258e-05, "loss": 0.5885, "step": 3401 }, { "epoch": 0.23949313621964097, "grad_norm": 2.250140428543091, "learning_rate": 1.2450285906705042e-05, "loss": 0.732, "step": 3402 }, { "epoch": 0.23956353396691307, "grad_norm": 1.8022961616516113, "learning_rate": 1.2449284200016523e-05, "loss": 0.7618, "step": 3403 }, { "epoch": 0.23963393171418515, "grad_norm": 2.02912974357605, "learning_rate": 1.2448282210014784e-05, "loss": 0.6922, "step": 3404 }, { "epoch": 0.23970432946145723, "grad_norm": 1.8509401082992554, "learning_rate": 1.2447279936751918e-05, "loss": 0.8191, "step": 3405 }, { "epoch": 0.23977472720872933, "grad_norm": 2.245364189147949, "learning_rate": 1.2446277380280034e-05, "loss": 0.7237, "step": 3406 }, { "epoch": 0.2398451249560014, "grad_norm": 2.1921169757843018, "learning_rate": 1.2445274540651259e-05, "loss": 0.7648, "step": 3407 }, { "epoch": 0.2399155227032735, "grad_norm": 2.237760305404663, "learning_rate": 1.2444271417917726e-05, "loss": 0.6902, "step": 3408 }, { "epoch": 0.2399859204505456, "grad_norm": 1.842387080192566, "learning_rate": 1.2443268012131595e-05, "loss": 0.7868, "step": 3409 }, { "epoch": 0.24005631819781767, "grad_norm": 2.1130874156951904, "learning_rate": 1.2442264323345028e-05, "loss": 0.749, "step": 3410 }, { "epoch": 0.24012671594508975, "grad_norm": 2.07700514793396, "learning_rate": 1.2441260351610211e-05, "loss": 0.7423, "step": 3411 }, { "epoch": 0.24019711369236185, "grad_norm": 2.1439208984375, "learning_rate": 1.244025609697934e-05, "loss": 0.7474, "step": 3412 }, { "epoch": 0.24026751143963393, "grad_norm": 1.940063714981079, "learning_rate": 1.2439251559504626e-05, "loss": 0.6521, "step": 3413 }, { "epoch": 0.240337909186906, "grad_norm": 2.2455925941467285, "learning_rate": 1.2438246739238301e-05, "loss": 0.7462, "step": 3414 }, { "epoch": 0.2404083069341781, "grad_norm": 1.9169799089431763, "learning_rate": 1.2437241636232599e-05, "loss": 0.7471, "step": 3415 }, { "epoch": 0.2404787046814502, "grad_norm": 2.078136920928955, "learning_rate": 1.243623625053978e-05, "loss": 0.7627, "step": 3416 }, { "epoch": 0.2405491024287223, "grad_norm": 2.4264116287231445, "learning_rate": 1.2435230582212118e-05, "loss": 0.6697, "step": 3417 }, { "epoch": 0.24061950017599437, "grad_norm": 1.6818963289260864, "learning_rate": 1.2434224631301895e-05, "loss": 0.7181, "step": 3418 }, { "epoch": 0.24068989792326645, "grad_norm": 1.7889207601547241, "learning_rate": 1.243321839786141e-05, "loss": 0.6375, "step": 3419 }, { "epoch": 0.24076029567053855, "grad_norm": 1.8570665121078491, "learning_rate": 1.2432211881942981e-05, "loss": 0.8331, "step": 3420 }, { "epoch": 0.24083069341781063, "grad_norm": 1.9837764501571655, "learning_rate": 1.2431205083598937e-05, "loss": 0.6778, "step": 3421 }, { "epoch": 0.2409010911650827, "grad_norm": 1.9278299808502197, "learning_rate": 1.2430198002881623e-05, "loss": 0.6484, "step": 3422 }, { "epoch": 0.24097148891235481, "grad_norm": 2.2035725116729736, "learning_rate": 1.2429190639843395e-05, "loss": 0.5986, "step": 3423 }, { "epoch": 0.2410418866596269, "grad_norm": 2.2721638679504395, "learning_rate": 1.2428182994536628e-05, "loss": 0.7489, "step": 3424 }, { "epoch": 0.24111228440689897, "grad_norm": 1.8911609649658203, "learning_rate": 1.2427175067013711e-05, "loss": 0.7461, "step": 3425 }, { "epoch": 0.24118268215417107, "grad_norm": 2.3391382694244385, "learning_rate": 1.2426166857327049e-05, "loss": 0.7341, "step": 3426 }, { "epoch": 0.24125307990144315, "grad_norm": 2.112961530685425, "learning_rate": 1.2425158365529058e-05, "loss": 0.6729, "step": 3427 }, { "epoch": 0.24132347764871523, "grad_norm": 2.2887165546417236, "learning_rate": 1.2424149591672169e-05, "loss": 0.7506, "step": 3428 }, { "epoch": 0.24139387539598733, "grad_norm": 4.28475284576416, "learning_rate": 1.2423140535808832e-05, "loss": 0.7564, "step": 3429 }, { "epoch": 0.2414642731432594, "grad_norm": 2.1913299560546875, "learning_rate": 1.2422131197991506e-05, "loss": 0.7901, "step": 3430 }, { "epoch": 0.24153467089053152, "grad_norm": 1.9419962167739868, "learning_rate": 1.2421121578272669e-05, "loss": 0.8179, "step": 3431 }, { "epoch": 0.2416050686378036, "grad_norm": 2.0608956813812256, "learning_rate": 1.242011167670481e-05, "loss": 0.7502, "step": 3432 }, { "epoch": 0.24167546638507567, "grad_norm": 2.428006887435913, "learning_rate": 1.2419101493340436e-05, "loss": 0.8304, "step": 3433 }, { "epoch": 0.24174586413234778, "grad_norm": 2.0818750858306885, "learning_rate": 1.2418091028232068e-05, "loss": 0.7005, "step": 3434 }, { "epoch": 0.24181626187961985, "grad_norm": 2.038390874862671, "learning_rate": 1.2417080281432242e-05, "loss": 0.8602, "step": 3435 }, { "epoch": 0.24188665962689193, "grad_norm": 1.9287844896316528, "learning_rate": 1.2416069252993505e-05, "loss": 0.6616, "step": 3436 }, { "epoch": 0.24195705737416404, "grad_norm": 2.285423994064331, "learning_rate": 1.2415057942968422e-05, "loss": 0.709, "step": 3437 }, { "epoch": 0.2420274551214361, "grad_norm": 2.359217643737793, "learning_rate": 1.2414046351409574e-05, "loss": 0.7657, "step": 3438 }, { "epoch": 0.2420978528687082, "grad_norm": 1.937303066253662, "learning_rate": 1.241303447836955e-05, "loss": 0.7507, "step": 3439 }, { "epoch": 0.2421682506159803, "grad_norm": 1.98556387424469, "learning_rate": 1.2412022323900963e-05, "loss": 0.7647, "step": 3440 }, { "epoch": 0.24223864836325237, "grad_norm": 2.7347629070281982, "learning_rate": 1.2411009888056435e-05, "loss": 0.6382, "step": 3441 }, { "epoch": 0.24230904611052445, "grad_norm": 3.3726465702056885, "learning_rate": 1.2409997170888602e-05, "loss": 0.6731, "step": 3442 }, { "epoch": 0.24237944385779656, "grad_norm": 2.0269618034362793, "learning_rate": 1.2408984172450117e-05, "loss": 0.7415, "step": 3443 }, { "epoch": 0.24244984160506863, "grad_norm": 2.093360662460327, "learning_rate": 1.2407970892793643e-05, "loss": 0.6302, "step": 3444 }, { "epoch": 0.24252023935234074, "grad_norm": 1.8622691631317139, "learning_rate": 1.2406957331971867e-05, "loss": 0.7231, "step": 3445 }, { "epoch": 0.24259063709961282, "grad_norm": 1.496277093887329, "learning_rate": 1.2405943490037485e-05, "loss": 0.8204, "step": 3446 }, { "epoch": 0.2426610348468849, "grad_norm": 2.6820340156555176, "learning_rate": 1.2404929367043201e-05, "loss": 0.8047, "step": 3447 }, { "epoch": 0.242731432594157, "grad_norm": 2.156318426132202, "learning_rate": 1.2403914963041744e-05, "loss": 0.7501, "step": 3448 }, { "epoch": 0.24280183034142908, "grad_norm": 1.7875045537948608, "learning_rate": 1.2402900278085856e-05, "loss": 0.7269, "step": 3449 }, { "epoch": 0.24287222808870115, "grad_norm": 2.0446553230285645, "learning_rate": 1.240188531222829e-05, "loss": 0.7957, "step": 3450 }, { "epoch": 0.24294262583597326, "grad_norm": 2.034390449523926, "learning_rate": 1.2400870065521813e-05, "loss": 0.7847, "step": 3451 }, { "epoch": 0.24301302358324534, "grad_norm": 1.8579272031784058, "learning_rate": 1.239985453801921e-05, "loss": 0.7371, "step": 3452 }, { "epoch": 0.2430834213305174, "grad_norm": 2.498750925064087, "learning_rate": 1.239883872977328e-05, "loss": 0.7, "step": 3453 }, { "epoch": 0.24315381907778952, "grad_norm": 2.5268609523773193, "learning_rate": 1.2397822640836834e-05, "loss": 0.7005, "step": 3454 }, { "epoch": 0.2432242168250616, "grad_norm": 2.5435361862182617, "learning_rate": 1.23968062712627e-05, "loss": 0.8021, "step": 3455 }, { "epoch": 0.24329461457233367, "grad_norm": 1.9063462018966675, "learning_rate": 1.2395789621103721e-05, "loss": 0.6967, "step": 3456 }, { "epoch": 0.24336501231960578, "grad_norm": 1.87315034866333, "learning_rate": 1.2394772690412753e-05, "loss": 0.6642, "step": 3457 }, { "epoch": 0.24343541006687786, "grad_norm": 1.8124486207962036, "learning_rate": 1.2393755479242666e-05, "loss": 0.7005, "step": 3458 }, { "epoch": 0.24350580781414996, "grad_norm": 2.2874608039855957, "learning_rate": 1.239273798764635e-05, "loss": 0.751, "step": 3459 }, { "epoch": 0.24357620556142204, "grad_norm": 2.0943145751953125, "learning_rate": 1.2391720215676698e-05, "loss": 0.7088, "step": 3460 }, { "epoch": 0.24364660330869412, "grad_norm": 1.98458993434906, "learning_rate": 1.2390702163386632e-05, "loss": 0.7233, "step": 3461 }, { "epoch": 0.24371700105596622, "grad_norm": 2.516822338104248, "learning_rate": 1.2389683830829076e-05, "loss": 0.7617, "step": 3462 }, { "epoch": 0.2437873988032383, "grad_norm": 2.354823589324951, "learning_rate": 1.2388665218056979e-05, "loss": 0.847, "step": 3463 }, { "epoch": 0.24385779655051038, "grad_norm": 2.514420747756958, "learning_rate": 1.2387646325123294e-05, "loss": 0.7453, "step": 3464 }, { "epoch": 0.24392819429778248, "grad_norm": 2.3035061359405518, "learning_rate": 1.2386627152080998e-05, "loss": 0.8553, "step": 3465 }, { "epoch": 0.24399859204505456, "grad_norm": 2.1610493659973145, "learning_rate": 1.238560769898308e-05, "loss": 0.797, "step": 3466 }, { "epoch": 0.24406898979232663, "grad_norm": 2.1125359535217285, "learning_rate": 1.238458796588254e-05, "loss": 0.7552, "step": 3467 }, { "epoch": 0.24413938753959874, "grad_norm": 3.0067691802978516, "learning_rate": 1.2383567952832393e-05, "loss": 0.6446, "step": 3468 }, { "epoch": 0.24420978528687082, "grad_norm": 2.0382766723632812, "learning_rate": 1.2382547659885674e-05, "loss": 0.8705, "step": 3469 }, { "epoch": 0.2442801830341429, "grad_norm": 2.0456671714782715, "learning_rate": 1.2381527087095426e-05, "loss": 0.6852, "step": 3470 }, { "epoch": 0.244350580781415, "grad_norm": 2.405489206314087, "learning_rate": 1.2380506234514713e-05, "loss": 0.7191, "step": 3471 }, { "epoch": 0.24442097852868708, "grad_norm": 3.1922590732574463, "learning_rate": 1.2379485102196607e-05, "loss": 0.733, "step": 3472 }, { "epoch": 0.24449137627595918, "grad_norm": 2.1326560974121094, "learning_rate": 1.2378463690194198e-05, "loss": 0.7582, "step": 3473 }, { "epoch": 0.24456177402323126, "grad_norm": 1.9628627300262451, "learning_rate": 1.2377441998560592e-05, "loss": 0.7167, "step": 3474 }, { "epoch": 0.24463217177050334, "grad_norm": 1.7776806354522705, "learning_rate": 1.2376420027348905e-05, "loss": 0.662, "step": 3475 }, { "epoch": 0.24470256951777544, "grad_norm": 1.7138710021972656, "learning_rate": 1.2375397776612273e-05, "loss": 0.6973, "step": 3476 }, { "epoch": 0.24477296726504752, "grad_norm": 2.143932819366455, "learning_rate": 1.237437524640384e-05, "loss": 0.7298, "step": 3477 }, { "epoch": 0.2448433650123196, "grad_norm": 1.8266419172286987, "learning_rate": 1.2373352436776774e-05, "loss": 0.7959, "step": 3478 }, { "epoch": 0.2449137627595917, "grad_norm": 1.95771062374115, "learning_rate": 1.2372329347784247e-05, "loss": 0.6148, "step": 3479 }, { "epoch": 0.24498416050686378, "grad_norm": 2.6809232234954834, "learning_rate": 1.2371305979479452e-05, "loss": 0.6549, "step": 3480 }, { "epoch": 0.24505455825413586, "grad_norm": 2.029634714126587, "learning_rate": 1.2370282331915593e-05, "loss": 0.7339, "step": 3481 }, { "epoch": 0.24512495600140796, "grad_norm": 2.267069101333618, "learning_rate": 1.2369258405145893e-05, "loss": 0.8396, "step": 3482 }, { "epoch": 0.24519535374868004, "grad_norm": 2.128948450088501, "learning_rate": 1.2368234199223585e-05, "loss": 0.6439, "step": 3483 }, { "epoch": 0.24526575149595212, "grad_norm": 1.8612091541290283, "learning_rate": 1.236720971420192e-05, "loss": 0.7965, "step": 3484 }, { "epoch": 0.24533614924322422, "grad_norm": 1.9058741331100464, "learning_rate": 1.2366184950134162e-05, "loss": 0.6309, "step": 3485 }, { "epoch": 0.2454065469904963, "grad_norm": 1.8205245733261108, "learning_rate": 1.2365159907073586e-05, "loss": 0.6949, "step": 3486 }, { "epoch": 0.2454769447377684, "grad_norm": 1.7608234882354736, "learning_rate": 1.236413458507349e-05, "loss": 0.774, "step": 3487 }, { "epoch": 0.24554734248504048, "grad_norm": 1.8468517065048218, "learning_rate": 1.2363108984187179e-05, "loss": 0.773, "step": 3488 }, { "epoch": 0.24561774023231256, "grad_norm": 2.0087201595306396, "learning_rate": 1.2362083104467972e-05, "loss": 0.8329, "step": 3489 }, { "epoch": 0.24568813797958466, "grad_norm": 2.244976282119751, "learning_rate": 1.2361056945969212e-05, "loss": 0.7534, "step": 3490 }, { "epoch": 0.24575853572685674, "grad_norm": 1.7363563776016235, "learning_rate": 1.2360030508744243e-05, "loss": 0.7065, "step": 3491 }, { "epoch": 0.24582893347412882, "grad_norm": 2.536287546157837, "learning_rate": 1.2359003792846437e-05, "loss": 0.6913, "step": 3492 }, { "epoch": 0.24589933122140092, "grad_norm": 3.0269908905029297, "learning_rate": 1.2357976798329165e-05, "loss": 0.7713, "step": 3493 }, { "epoch": 0.245969728968673, "grad_norm": 1.8858305215835571, "learning_rate": 1.2356949525245831e-05, "loss": 0.6857, "step": 3494 }, { "epoch": 0.24604012671594508, "grad_norm": 1.8352850675582886, "learning_rate": 1.2355921973649838e-05, "loss": 0.6894, "step": 3495 }, { "epoch": 0.24611052446321718, "grad_norm": 2.4612677097320557, "learning_rate": 1.2354894143594612e-05, "loss": 0.8492, "step": 3496 }, { "epoch": 0.24618092221048926, "grad_norm": 2.1407828330993652, "learning_rate": 1.2353866035133589e-05, "loss": 0.8316, "step": 3497 }, { "epoch": 0.24625131995776134, "grad_norm": 1.9909467697143555, "learning_rate": 1.2352837648320222e-05, "loss": 0.691, "step": 3498 }, { "epoch": 0.24632171770503344, "grad_norm": 1.9915881156921387, "learning_rate": 1.2351808983207979e-05, "loss": 0.7787, "step": 3499 }, { "epoch": 0.24639211545230552, "grad_norm": 1.8780733346939087, "learning_rate": 1.235078003985034e-05, "loss": 0.6803, "step": 3500 }, { "epoch": 0.24646251319957763, "grad_norm": 2.093095302581787, "learning_rate": 1.2349750818300798e-05, "loss": 0.7795, "step": 3501 }, { "epoch": 0.2465329109468497, "grad_norm": 1.8787952661514282, "learning_rate": 1.234872131861287e-05, "loss": 0.8047, "step": 3502 }, { "epoch": 0.24660330869412178, "grad_norm": 2.5988423824310303, "learning_rate": 1.2347691540840073e-05, "loss": 0.7873, "step": 3503 }, { "epoch": 0.2466737064413939, "grad_norm": 1.933863878250122, "learning_rate": 1.2346661485035951e-05, "loss": 0.6685, "step": 3504 }, { "epoch": 0.24674410418866596, "grad_norm": 1.7685267925262451, "learning_rate": 1.2345631151254056e-05, "loss": 0.725, "step": 3505 }, { "epoch": 0.24681450193593804, "grad_norm": 2.110330820083618, "learning_rate": 1.2344600539547957e-05, "loss": 0.6366, "step": 3506 }, { "epoch": 0.24688489968321015, "grad_norm": 1.9746490716934204, "learning_rate": 1.2343569649971235e-05, "loss": 0.7082, "step": 3507 }, { "epoch": 0.24695529743048222, "grad_norm": 1.9742424488067627, "learning_rate": 1.2342538482577489e-05, "loss": 0.8545, "step": 3508 }, { "epoch": 0.2470256951777543, "grad_norm": 1.9399806261062622, "learning_rate": 1.2341507037420326e-05, "loss": 0.6771, "step": 3509 }, { "epoch": 0.2470960929250264, "grad_norm": 2.2715096473693848, "learning_rate": 1.2340475314553376e-05, "loss": 0.7259, "step": 3510 }, { "epoch": 0.24716649067229848, "grad_norm": 1.8838448524475098, "learning_rate": 1.2339443314030277e-05, "loss": 0.7285, "step": 3511 }, { "epoch": 0.24723688841957056, "grad_norm": 2.0903069972991943, "learning_rate": 1.2338411035904685e-05, "loss": 0.8914, "step": 3512 }, { "epoch": 0.24730728616684267, "grad_norm": 2.1358842849731445, "learning_rate": 1.2337378480230269e-05, "loss": 0.8083, "step": 3513 }, { "epoch": 0.24737768391411474, "grad_norm": 1.976032018661499, "learning_rate": 1.2336345647060714e-05, "loss": 0.6279, "step": 3514 }, { "epoch": 0.24744808166138685, "grad_norm": 1.8861616849899292, "learning_rate": 1.2335312536449711e-05, "loss": 0.7535, "step": 3515 }, { "epoch": 0.24751847940865893, "grad_norm": 2.2500710487365723, "learning_rate": 1.233427914845098e-05, "loss": 0.7514, "step": 3516 }, { "epoch": 0.247588877155931, "grad_norm": 1.8484090566635132, "learning_rate": 1.2333245483118246e-05, "loss": 0.6512, "step": 3517 }, { "epoch": 0.2476592749032031, "grad_norm": 1.7563858032226562, "learning_rate": 1.2332211540505248e-05, "loss": 0.737, "step": 3518 }, { "epoch": 0.24772967265047519, "grad_norm": 3.28251051902771, "learning_rate": 1.2331177320665742e-05, "loss": 0.7263, "step": 3519 }, { "epoch": 0.24780007039774726, "grad_norm": 1.5635442733764648, "learning_rate": 1.23301428236535e-05, "loss": 0.6404, "step": 3520 }, { "epoch": 0.24787046814501937, "grad_norm": 1.873259425163269, "learning_rate": 1.2329108049522307e-05, "loss": 0.8464, "step": 3521 }, { "epoch": 0.24794086589229145, "grad_norm": 2.1881422996520996, "learning_rate": 1.2328072998325957e-05, "loss": 0.7049, "step": 3522 }, { "epoch": 0.24801126363956352, "grad_norm": 1.8884719610214233, "learning_rate": 1.2327037670118268e-05, "loss": 0.7881, "step": 3523 }, { "epoch": 0.24808166138683563, "grad_norm": 2.238644599914551, "learning_rate": 1.2326002064953065e-05, "loss": 0.7523, "step": 3524 }, { "epoch": 0.2481520591341077, "grad_norm": 5.130863666534424, "learning_rate": 1.2324966182884193e-05, "loss": 0.7299, "step": 3525 }, { "epoch": 0.24822245688137978, "grad_norm": 1.9441323280334473, "learning_rate": 1.2323930023965506e-05, "loss": 0.674, "step": 3526 }, { "epoch": 0.2482928546286519, "grad_norm": 2.6326253414154053, "learning_rate": 1.2322893588250874e-05, "loss": 0.8528, "step": 3527 }, { "epoch": 0.24836325237592397, "grad_norm": 1.832019329071045, "learning_rate": 1.2321856875794187e-05, "loss": 0.6613, "step": 3528 }, { "epoch": 0.24843365012319607, "grad_norm": 2.0515711307525635, "learning_rate": 1.2320819886649338e-05, "loss": 0.6649, "step": 3529 }, { "epoch": 0.24850404787046815, "grad_norm": 1.9349201917648315, "learning_rate": 1.2319782620870245e-05, "loss": 0.839, "step": 3530 }, { "epoch": 0.24857444561774023, "grad_norm": 1.9017839431762695, "learning_rate": 1.2318745078510836e-05, "loss": 0.666, "step": 3531 }, { "epoch": 0.24864484336501233, "grad_norm": 1.8369090557098389, "learning_rate": 1.2317707259625053e-05, "loss": 0.7653, "step": 3532 }, { "epoch": 0.2487152411122844, "grad_norm": 1.8751025199890137, "learning_rate": 1.2316669164266855e-05, "loss": 0.7156, "step": 3533 }, { "epoch": 0.24878563885955648, "grad_norm": 4.131645202636719, "learning_rate": 1.231563079249021e-05, "loss": 0.8541, "step": 3534 }, { "epoch": 0.2488560366068286, "grad_norm": 2.667304515838623, "learning_rate": 1.2314592144349109e-05, "loss": 0.7617, "step": 3535 }, { "epoch": 0.24892643435410067, "grad_norm": 1.8570104837417603, "learning_rate": 1.2313553219897546e-05, "loss": 0.727, "step": 3536 }, { "epoch": 0.24899683210137274, "grad_norm": 1.9895799160003662, "learning_rate": 1.231251401918954e-05, "loss": 0.775, "step": 3537 }, { "epoch": 0.24906722984864485, "grad_norm": 1.762984275817871, "learning_rate": 1.2311474542279118e-05, "loss": 0.6555, "step": 3538 }, { "epoch": 0.24913762759591693, "grad_norm": 2.0270869731903076, "learning_rate": 1.2310434789220326e-05, "loss": 0.7039, "step": 3539 }, { "epoch": 0.249208025343189, "grad_norm": 2.320786237716675, "learning_rate": 1.230939476006722e-05, "loss": 0.7201, "step": 3540 }, { "epoch": 0.2492784230904611, "grad_norm": 2.0420985221862793, "learning_rate": 1.2308354454873869e-05, "loss": 0.7761, "step": 3541 }, { "epoch": 0.2493488208377332, "grad_norm": 1.854521632194519, "learning_rate": 1.2307313873694365e-05, "loss": 0.6621, "step": 3542 }, { "epoch": 0.2494192185850053, "grad_norm": 1.9656740427017212, "learning_rate": 1.2306273016582805e-05, "loss": 0.7318, "step": 3543 }, { "epoch": 0.24948961633227737, "grad_norm": 2.009167432785034, "learning_rate": 1.2305231883593307e-05, "loss": 0.7302, "step": 3544 }, { "epoch": 0.24956001407954945, "grad_norm": 1.6242207288742065, "learning_rate": 1.2304190474779996e-05, "loss": 0.571, "step": 3545 }, { "epoch": 0.24963041182682155, "grad_norm": 2.229170799255371, "learning_rate": 1.230314879019702e-05, "loss": 0.7516, "step": 3546 }, { "epoch": 0.24970080957409363, "grad_norm": 2.073471784591675, "learning_rate": 1.2302106829898536e-05, "loss": 0.8663, "step": 3547 }, { "epoch": 0.2497712073213657, "grad_norm": 2.1961145401000977, "learning_rate": 1.2301064593938716e-05, "loss": 0.6766, "step": 3548 }, { "epoch": 0.2498416050686378, "grad_norm": 2.161740303039551, "learning_rate": 1.2300022082371746e-05, "loss": 0.8533, "step": 3549 }, { "epoch": 0.2499120028159099, "grad_norm": 2.192936420440674, "learning_rate": 1.2298979295251827e-05, "loss": 0.6752, "step": 3550 }, { "epoch": 0.24998240056318197, "grad_norm": 2.0045647621154785, "learning_rate": 1.2297936232633177e-05, "loss": 0.7136, "step": 3551 }, { "epoch": 0.25005279831045407, "grad_norm": 1.9876997470855713, "learning_rate": 1.2296892894570024e-05, "loss": 0.7, "step": 3552 }, { "epoch": 0.25012319605772615, "grad_norm": 1.6963567733764648, "learning_rate": 1.2295849281116612e-05, "loss": 0.7297, "step": 3553 }, { "epoch": 0.2501935938049982, "grad_norm": 2.1989288330078125, "learning_rate": 1.22948053923272e-05, "loss": 0.7424, "step": 3554 }, { "epoch": 0.2502639915522703, "grad_norm": 2.2288973331451416, "learning_rate": 1.2293761228256061e-05, "loss": 0.816, "step": 3555 }, { "epoch": 0.25033438929954244, "grad_norm": 2.2370355129241943, "learning_rate": 1.2292716788957479e-05, "loss": 0.6995, "step": 3556 }, { "epoch": 0.2504047870468145, "grad_norm": 1.7778412103652954, "learning_rate": 1.229167207448576e-05, "loss": 0.7545, "step": 3557 }, { "epoch": 0.2504751847940866, "grad_norm": 1.6971001625061035, "learning_rate": 1.2290627084895218e-05, "loss": 0.7602, "step": 3558 }, { "epoch": 0.25054558254135867, "grad_norm": 1.8206727504730225, "learning_rate": 1.228958182024018e-05, "loss": 0.7818, "step": 3559 }, { "epoch": 0.25061598028863075, "grad_norm": 1.9237855672836304, "learning_rate": 1.2288536280574994e-05, "loss": 0.7468, "step": 3560 }, { "epoch": 0.2506863780359028, "grad_norm": 1.6379793882369995, "learning_rate": 1.2287490465954017e-05, "loss": 0.6263, "step": 3561 }, { "epoch": 0.25075677578317496, "grad_norm": 1.7998038530349731, "learning_rate": 1.2286444376431622e-05, "loss": 0.71, "step": 3562 }, { "epoch": 0.25082717353044703, "grad_norm": 2.292454242706299, "learning_rate": 1.2285398012062195e-05, "loss": 0.716, "step": 3563 }, { "epoch": 0.2508975712777191, "grad_norm": 1.8891268968582153, "learning_rate": 1.2284351372900141e-05, "loss": 0.6568, "step": 3564 }, { "epoch": 0.2509679690249912, "grad_norm": 1.9516745805740356, "learning_rate": 1.2283304458999874e-05, "loss": 0.7456, "step": 3565 }, { "epoch": 0.25103836677226327, "grad_norm": 1.8960424661636353, "learning_rate": 1.2282257270415819e-05, "loss": 0.8633, "step": 3566 }, { "epoch": 0.2511087645195354, "grad_norm": 2.001636505126953, "learning_rate": 1.2281209807202427e-05, "loss": 0.7063, "step": 3567 }, { "epoch": 0.2511791622668075, "grad_norm": 1.6523736715316772, "learning_rate": 1.2280162069414154e-05, "loss": 0.7351, "step": 3568 }, { "epoch": 0.25124956001407955, "grad_norm": 1.9434362649917603, "learning_rate": 1.2279114057105475e-05, "loss": 0.8072, "step": 3569 }, { "epoch": 0.25131995776135163, "grad_norm": 1.8666815757751465, "learning_rate": 1.2278065770330873e-05, "loss": 0.8127, "step": 3570 }, { "epoch": 0.2513903555086237, "grad_norm": 1.8874766826629639, "learning_rate": 1.2277017209144852e-05, "loss": 0.7102, "step": 3571 }, { "epoch": 0.2514607532558958, "grad_norm": 1.8481684923171997, "learning_rate": 1.2275968373601928e-05, "loss": 0.7358, "step": 3572 }, { "epoch": 0.2515311510031679, "grad_norm": 1.8213880062103271, "learning_rate": 1.2274919263756628e-05, "loss": 0.9867, "step": 3573 }, { "epoch": 0.25160154875044, "grad_norm": 2.21601939201355, "learning_rate": 1.22738698796635e-05, "loss": 0.6903, "step": 3574 }, { "epoch": 0.2516719464977121, "grad_norm": 1.723459005355835, "learning_rate": 1.2272820221377097e-05, "loss": 0.6755, "step": 3575 }, { "epoch": 0.25174234424498415, "grad_norm": 2.0670039653778076, "learning_rate": 1.2271770288952e-05, "loss": 0.7734, "step": 3576 }, { "epoch": 0.25181274199225623, "grad_norm": 2.2728521823883057, "learning_rate": 1.227072008244279e-05, "loss": 0.6976, "step": 3577 }, { "epoch": 0.25188313973952836, "grad_norm": 1.9877723455429077, "learning_rate": 1.226966960190407e-05, "loss": 0.6386, "step": 3578 }, { "epoch": 0.25195353748680044, "grad_norm": 2.120692014694214, "learning_rate": 1.2268618847390453e-05, "loss": 0.6996, "step": 3579 }, { "epoch": 0.2520239352340725, "grad_norm": 2.196427583694458, "learning_rate": 1.2267567818956572e-05, "loss": 0.6845, "step": 3580 }, { "epoch": 0.2520943329813446, "grad_norm": 2.0473389625549316, "learning_rate": 1.226651651665707e-05, "loss": 0.6834, "step": 3581 }, { "epoch": 0.25216473072861667, "grad_norm": 1.7952088117599487, "learning_rate": 1.2265464940546602e-05, "loss": 0.7703, "step": 3582 }, { "epoch": 0.25223512847588875, "grad_norm": 1.8261966705322266, "learning_rate": 1.2264413090679845e-05, "loss": 0.7298, "step": 3583 }, { "epoch": 0.2523055262231609, "grad_norm": 2.2234151363372803, "learning_rate": 1.2263360967111485e-05, "loss": 0.6328, "step": 3584 }, { "epoch": 0.25237592397043296, "grad_norm": 2.1191554069519043, "learning_rate": 1.2262308569896218e-05, "loss": 0.6018, "step": 3585 }, { "epoch": 0.25244632171770504, "grad_norm": 1.8102914094924927, "learning_rate": 1.2261255899088766e-05, "loss": 0.707, "step": 3586 }, { "epoch": 0.2525167194649771, "grad_norm": 2.1377384662628174, "learning_rate": 1.2260202954743851e-05, "loss": 0.7908, "step": 3587 }, { "epoch": 0.2525871172122492, "grad_norm": 2.1621434688568115, "learning_rate": 1.2259149736916225e-05, "loss": 0.6814, "step": 3588 }, { "epoch": 0.25265751495952127, "grad_norm": 1.8907899856567383, "learning_rate": 1.225809624566064e-05, "loss": 0.8035, "step": 3589 }, { "epoch": 0.2527279127067934, "grad_norm": 2.3993029594421387, "learning_rate": 1.2257042481031867e-05, "loss": 0.8514, "step": 3590 }, { "epoch": 0.2527983104540655, "grad_norm": 2.0951051712036133, "learning_rate": 1.2255988443084696e-05, "loss": 0.7055, "step": 3591 }, { "epoch": 0.25286870820133756, "grad_norm": 3.3863697052001953, "learning_rate": 1.2254934131873926e-05, "loss": 0.7268, "step": 3592 }, { "epoch": 0.25293910594860963, "grad_norm": 2.0205612182617188, "learning_rate": 1.225387954745437e-05, "loss": 0.7004, "step": 3593 }, { "epoch": 0.2530095036958817, "grad_norm": 2.128711223602295, "learning_rate": 1.2252824689880859e-05, "loss": 0.7595, "step": 3594 }, { "epoch": 0.25307990144315384, "grad_norm": 1.9378221035003662, "learning_rate": 1.2251769559208237e-05, "loss": 0.6972, "step": 3595 }, { "epoch": 0.2531502991904259, "grad_norm": 2.1935276985168457, "learning_rate": 1.2250714155491357e-05, "loss": 0.7147, "step": 3596 }, { "epoch": 0.253220696937698, "grad_norm": 1.9349498748779297, "learning_rate": 1.2249658478785093e-05, "loss": 0.716, "step": 3597 }, { "epoch": 0.2532910946849701, "grad_norm": 2.0137217044830322, "learning_rate": 1.224860252914433e-05, "loss": 0.6491, "step": 3598 }, { "epoch": 0.25336149243224215, "grad_norm": 1.7060962915420532, "learning_rate": 1.2247546306623969e-05, "loss": 0.7156, "step": 3599 }, { "epoch": 0.25343189017951423, "grad_norm": 1.9797686338424683, "learning_rate": 1.2246489811278923e-05, "loss": 0.7125, "step": 3600 }, { "epoch": 0.25350228792678636, "grad_norm": 1.9185479879379272, "learning_rate": 1.2245433043164118e-05, "loss": 0.6506, "step": 3601 }, { "epoch": 0.25357268567405844, "grad_norm": 2.1446027755737305, "learning_rate": 1.2244376002334501e-05, "loss": 0.8248, "step": 3602 }, { "epoch": 0.2536430834213305, "grad_norm": 2.1215415000915527, "learning_rate": 1.2243318688845027e-05, "loss": 0.7197, "step": 3603 }, { "epoch": 0.2537134811686026, "grad_norm": 2.0447115898132324, "learning_rate": 1.2242261102750663e-05, "loss": 0.9078, "step": 3604 }, { "epoch": 0.25378387891587467, "grad_norm": 2.075596809387207, "learning_rate": 1.2241203244106398e-05, "loss": 0.6857, "step": 3605 }, { "epoch": 0.2538542766631468, "grad_norm": 1.99501633644104, "learning_rate": 1.224014511296723e-05, "loss": 0.7965, "step": 3606 }, { "epoch": 0.2539246744104189, "grad_norm": 1.9944673776626587, "learning_rate": 1.223908670938817e-05, "loss": 0.757, "step": 3607 }, { "epoch": 0.25399507215769096, "grad_norm": 1.9942903518676758, "learning_rate": 1.2238028033424249e-05, "loss": 0.7548, "step": 3608 }, { "epoch": 0.25406546990496304, "grad_norm": 1.9089324474334717, "learning_rate": 1.2236969085130506e-05, "loss": 0.7555, "step": 3609 }, { "epoch": 0.2541358676522351, "grad_norm": 2.193957805633545, "learning_rate": 1.2235909864561998e-05, "loss": 0.7673, "step": 3610 }, { "epoch": 0.2542062653995072, "grad_norm": 1.6902503967285156, "learning_rate": 1.2234850371773793e-05, "loss": 0.8814, "step": 3611 }, { "epoch": 0.2542766631467793, "grad_norm": 1.7312393188476562, "learning_rate": 1.2233790606820979e-05, "loss": 0.7094, "step": 3612 }, { "epoch": 0.2543470608940514, "grad_norm": 1.8258726596832275, "learning_rate": 1.2232730569758648e-05, "loss": 0.758, "step": 3613 }, { "epoch": 0.2544174586413235, "grad_norm": 1.5053775310516357, "learning_rate": 1.2231670260641916e-05, "loss": 0.7025, "step": 3614 }, { "epoch": 0.25448785638859556, "grad_norm": 1.8192514181137085, "learning_rate": 1.223060967952591e-05, "loss": 0.8152, "step": 3615 }, { "epoch": 0.25455825413586763, "grad_norm": 2.1463520526885986, "learning_rate": 1.2229548826465771e-05, "loss": 0.8117, "step": 3616 }, { "epoch": 0.25462865188313977, "grad_norm": 2.010488748550415, "learning_rate": 1.222848770151665e-05, "loss": 0.7844, "step": 3617 }, { "epoch": 0.25469904963041184, "grad_norm": 2.025888681411743, "learning_rate": 1.222742630473372e-05, "loss": 0.8161, "step": 3618 }, { "epoch": 0.2547694473776839, "grad_norm": 1.766768455505371, "learning_rate": 1.222636463617216e-05, "loss": 0.7012, "step": 3619 }, { "epoch": 0.254839845124956, "grad_norm": 2.0290629863739014, "learning_rate": 1.2225302695887172e-05, "loss": 0.6478, "step": 3620 }, { "epoch": 0.2549102428722281, "grad_norm": 1.7602368593215942, "learning_rate": 1.2224240483933963e-05, "loss": 0.7158, "step": 3621 }, { "epoch": 0.25498064061950015, "grad_norm": 2.2058844566345215, "learning_rate": 1.2223178000367761e-05, "loss": 0.7203, "step": 3622 }, { "epoch": 0.2550510383667723, "grad_norm": 1.779801368713379, "learning_rate": 1.2222115245243801e-05, "loss": 0.7155, "step": 3623 }, { "epoch": 0.25512143611404436, "grad_norm": 1.961446762084961, "learning_rate": 1.2221052218617343e-05, "loss": 0.7905, "step": 3624 }, { "epoch": 0.25519183386131644, "grad_norm": 1.9131789207458496, "learning_rate": 1.2219988920543652e-05, "loss": 0.6447, "step": 3625 }, { "epoch": 0.2552622316085885, "grad_norm": 1.7837936878204346, "learning_rate": 1.2218925351078009e-05, "loss": 0.8335, "step": 3626 }, { "epoch": 0.2553326293558606, "grad_norm": 1.9121971130371094, "learning_rate": 1.2217861510275709e-05, "loss": 0.7024, "step": 3627 }, { "epoch": 0.2554030271031327, "grad_norm": 1.8676936626434326, "learning_rate": 1.2216797398192065e-05, "loss": 0.6705, "step": 3628 }, { "epoch": 0.2554734248504048, "grad_norm": 1.9657680988311768, "learning_rate": 1.2215733014882399e-05, "loss": 0.7734, "step": 3629 }, { "epoch": 0.2555438225976769, "grad_norm": 1.8178761005401611, "learning_rate": 1.221466836040205e-05, "loss": 0.6793, "step": 3630 }, { "epoch": 0.25561422034494896, "grad_norm": 2.0654585361480713, "learning_rate": 1.221360343480637e-05, "loss": 0.8428, "step": 3631 }, { "epoch": 0.25568461809222104, "grad_norm": 2.031270742416382, "learning_rate": 1.2212538238150727e-05, "loss": 0.7153, "step": 3632 }, { "epoch": 0.2557550158394931, "grad_norm": 2.1937756538391113, "learning_rate": 1.22114727704905e-05, "loss": 0.6758, "step": 3633 }, { "epoch": 0.25582541358676525, "grad_norm": 2.0769991874694824, "learning_rate": 1.2210407031881083e-05, "loss": 0.8937, "step": 3634 }, { "epoch": 0.2558958113340373, "grad_norm": 1.9003525972366333, "learning_rate": 1.2209341022377884e-05, "loss": 0.7548, "step": 3635 }, { "epoch": 0.2559662090813094, "grad_norm": 2.0467958450317383, "learning_rate": 1.220827474203633e-05, "loss": 0.8808, "step": 3636 }, { "epoch": 0.2560366068285815, "grad_norm": 1.9473772048950195, "learning_rate": 1.2207208190911856e-05, "loss": 0.7716, "step": 3637 }, { "epoch": 0.25610700457585356, "grad_norm": 1.8578901290893555, "learning_rate": 1.220614136905991e-05, "loss": 0.7106, "step": 3638 }, { "epoch": 0.25617740232312564, "grad_norm": 2.170498847961426, "learning_rate": 1.2205074276535959e-05, "loss": 0.707, "step": 3639 }, { "epoch": 0.25624780007039777, "grad_norm": 3.118760585784912, "learning_rate": 1.2204006913395485e-05, "loss": 0.6999, "step": 3640 }, { "epoch": 0.25631819781766985, "grad_norm": 2.064303398132324, "learning_rate": 1.2202939279693975e-05, "loss": 0.8251, "step": 3641 }, { "epoch": 0.2563885955649419, "grad_norm": 1.745065689086914, "learning_rate": 1.2201871375486944e-05, "loss": 0.7316, "step": 3642 }, { "epoch": 0.256458993312214, "grad_norm": 1.7765566110610962, "learning_rate": 1.2200803200829906e-05, "loss": 0.8455, "step": 3643 }, { "epoch": 0.2565293910594861, "grad_norm": 1.8464763164520264, "learning_rate": 1.2199734755778399e-05, "loss": 0.7077, "step": 3644 }, { "epoch": 0.2565997888067582, "grad_norm": 2.094757080078125, "learning_rate": 1.2198666040387977e-05, "loss": 0.8471, "step": 3645 }, { "epoch": 0.2566701865540303, "grad_norm": 1.9334124326705933, "learning_rate": 1.2197597054714195e-05, "loss": 0.7666, "step": 3646 }, { "epoch": 0.25674058430130237, "grad_norm": 2.005389451980591, "learning_rate": 1.2196527798812635e-05, "loss": 0.7991, "step": 3647 }, { "epoch": 0.25681098204857444, "grad_norm": 1.9663219451904297, "learning_rate": 1.2195458272738892e-05, "loss": 0.7957, "step": 3648 }, { "epoch": 0.2568813797958465, "grad_norm": 1.6510800123214722, "learning_rate": 1.2194388476548566e-05, "loss": 0.7383, "step": 3649 }, { "epoch": 0.2569517775431186, "grad_norm": 1.659825086593628, "learning_rate": 1.2193318410297278e-05, "loss": 0.691, "step": 3650 }, { "epoch": 0.25702217529039073, "grad_norm": 1.9018412828445435, "learning_rate": 1.2192248074040662e-05, "loss": 0.6862, "step": 3651 }, { "epoch": 0.2570925730376628, "grad_norm": 1.7882845401763916, "learning_rate": 1.2191177467834369e-05, "loss": 0.7374, "step": 3652 }, { "epoch": 0.2571629707849349, "grad_norm": 2.010503053665161, "learning_rate": 1.2190106591734056e-05, "loss": 0.7608, "step": 3653 }, { "epoch": 0.25723336853220696, "grad_norm": 2.0859057903289795, "learning_rate": 1.21890354457954e-05, "loss": 0.6963, "step": 3654 }, { "epoch": 0.25730376627947904, "grad_norm": 1.6234455108642578, "learning_rate": 1.2187964030074095e-05, "loss": 0.6757, "step": 3655 }, { "epoch": 0.2573741640267511, "grad_norm": 1.8284335136413574, "learning_rate": 1.218689234462584e-05, "loss": 0.7744, "step": 3656 }, { "epoch": 0.25744456177402325, "grad_norm": 1.8032524585723877, "learning_rate": 1.2185820389506355e-05, "loss": 0.7109, "step": 3657 }, { "epoch": 0.25751495952129533, "grad_norm": 2.4541232585906982, "learning_rate": 1.218474816477137e-05, "loss": 0.7604, "step": 3658 }, { "epoch": 0.2575853572685674, "grad_norm": 1.5970820188522339, "learning_rate": 1.2183675670476632e-05, "loss": 0.6709, "step": 3659 }, { "epoch": 0.2576557550158395, "grad_norm": 3.390453338623047, "learning_rate": 1.2182602906677904e-05, "loss": 0.7079, "step": 3660 }, { "epoch": 0.25772615276311156, "grad_norm": 2.0504367351531982, "learning_rate": 1.2181529873430954e-05, "loss": 0.8135, "step": 3661 }, { "epoch": 0.2577965505103837, "grad_norm": 1.7983391284942627, "learning_rate": 1.2180456570791575e-05, "loss": 0.763, "step": 3662 }, { "epoch": 0.25786694825765577, "grad_norm": 1.9128599166870117, "learning_rate": 1.2179382998815566e-05, "loss": 0.7664, "step": 3663 }, { "epoch": 0.25793734600492785, "grad_norm": 1.8708971738815308, "learning_rate": 1.2178309157558744e-05, "loss": 0.713, "step": 3664 }, { "epoch": 0.2580077437521999, "grad_norm": 1.947878360748291, "learning_rate": 1.2177235047076941e-05, "loss": 0.788, "step": 3665 }, { "epoch": 0.258078141499472, "grad_norm": 3.9084458351135254, "learning_rate": 1.2176160667426e-05, "loss": 0.7831, "step": 3666 }, { "epoch": 0.2581485392467441, "grad_norm": 2.0382204055786133, "learning_rate": 1.2175086018661777e-05, "loss": 0.6309, "step": 3667 }, { "epoch": 0.2582189369940162, "grad_norm": 2.050055980682373, "learning_rate": 1.2174011100840144e-05, "loss": 0.8068, "step": 3668 }, { "epoch": 0.2582893347412883, "grad_norm": 1.9449944496154785, "learning_rate": 1.2172935914016988e-05, "loss": 0.8862, "step": 3669 }, { "epoch": 0.25835973248856037, "grad_norm": 1.9053329229354858, "learning_rate": 1.217186045824821e-05, "loss": 0.7344, "step": 3670 }, { "epoch": 0.25843013023583244, "grad_norm": 2.67734956741333, "learning_rate": 1.2170784733589721e-05, "loss": 0.8082, "step": 3671 }, { "epoch": 0.2585005279831045, "grad_norm": 2.374748945236206, "learning_rate": 1.2169708740097453e-05, "loss": 0.7762, "step": 3672 }, { "epoch": 0.25857092573037666, "grad_norm": 1.810116171836853, "learning_rate": 1.2168632477827343e-05, "loss": 0.6859, "step": 3673 }, { "epoch": 0.25864132347764873, "grad_norm": 2.3427369594573975, "learning_rate": 1.2167555946835352e-05, "loss": 0.7924, "step": 3674 }, { "epoch": 0.2587117212249208, "grad_norm": 2.621683359146118, "learning_rate": 1.2166479147177447e-05, "loss": 0.7198, "step": 3675 }, { "epoch": 0.2587821189721929, "grad_norm": 2.1187007427215576, "learning_rate": 1.216540207890961e-05, "loss": 0.8507, "step": 3676 }, { "epoch": 0.25885251671946496, "grad_norm": 2.59725284576416, "learning_rate": 1.2164324742087844e-05, "loss": 0.7981, "step": 3677 }, { "epoch": 0.25892291446673704, "grad_norm": 1.448885202407837, "learning_rate": 1.2163247136768154e-05, "loss": 0.84, "step": 3678 }, { "epoch": 0.2589933122140092, "grad_norm": 1.931820034980774, "learning_rate": 1.2162169263006573e-05, "loss": 0.7667, "step": 3679 }, { "epoch": 0.25906370996128125, "grad_norm": 2.0883514881134033, "learning_rate": 1.2161091120859133e-05, "loss": 0.7178, "step": 3680 }, { "epoch": 0.25913410770855333, "grad_norm": 1.8008736371994019, "learning_rate": 1.2160012710381896e-05, "loss": 0.8101, "step": 3681 }, { "epoch": 0.2592045054558254, "grad_norm": 1.9086214303970337, "learning_rate": 1.215893403163092e-05, "loss": 0.6925, "step": 3682 }, { "epoch": 0.2592749032030975, "grad_norm": 1.7321678400039673, "learning_rate": 1.2157855084662294e-05, "loss": 0.6544, "step": 3683 }, { "epoch": 0.25934530095036956, "grad_norm": 2.0167829990386963, "learning_rate": 1.2156775869532113e-05, "loss": 0.7089, "step": 3684 }, { "epoch": 0.2594156986976417, "grad_norm": 1.9747283458709717, "learning_rate": 1.2155696386296482e-05, "loss": 0.6809, "step": 3685 }, { "epoch": 0.25948609644491377, "grad_norm": 1.6969846487045288, "learning_rate": 1.2154616635011526e-05, "loss": 0.6319, "step": 3686 }, { "epoch": 0.25955649419218585, "grad_norm": 1.6829713582992554, "learning_rate": 1.2153536615733384e-05, "loss": 0.8263, "step": 3687 }, { "epoch": 0.2596268919394579, "grad_norm": 1.9917230606079102, "learning_rate": 1.2152456328518207e-05, "loss": 0.7629, "step": 3688 }, { "epoch": 0.25969728968673, "grad_norm": 2.5854506492614746, "learning_rate": 1.2151375773422156e-05, "loss": 0.762, "step": 3689 }, { "epoch": 0.25976768743400214, "grad_norm": 2.1879148483276367, "learning_rate": 1.2150294950501415e-05, "loss": 0.732, "step": 3690 }, { "epoch": 0.2598380851812742, "grad_norm": 2.0332705974578857, "learning_rate": 1.2149213859812177e-05, "loss": 0.6572, "step": 3691 }, { "epoch": 0.2599084829285463, "grad_norm": 2.1148712635040283, "learning_rate": 1.2148132501410644e-05, "loss": 0.8495, "step": 3692 }, { "epoch": 0.25997888067581837, "grad_norm": 1.7611753940582275, "learning_rate": 1.214705087535304e-05, "loss": 0.7015, "step": 3693 }, { "epoch": 0.26004927842309045, "grad_norm": 1.8501012325286865, "learning_rate": 1.2145968981695602e-05, "loss": 0.6826, "step": 3694 }, { "epoch": 0.2601196761703625, "grad_norm": 2.083048105239868, "learning_rate": 1.2144886820494575e-05, "loss": 0.6461, "step": 3695 }, { "epoch": 0.26019007391763466, "grad_norm": 1.6914079189300537, "learning_rate": 1.2143804391806223e-05, "loss": 0.715, "step": 3696 }, { "epoch": 0.26026047166490673, "grad_norm": 2.032029390335083, "learning_rate": 1.2142721695686822e-05, "loss": 0.6681, "step": 3697 }, { "epoch": 0.2603308694121788, "grad_norm": 1.8269295692443848, "learning_rate": 1.2141638732192662e-05, "loss": 0.7979, "step": 3698 }, { "epoch": 0.2604012671594509, "grad_norm": 2.2394490242004395, "learning_rate": 1.2140555501380047e-05, "loss": 0.7328, "step": 3699 }, { "epoch": 0.26047166490672297, "grad_norm": 2.1288082599639893, "learning_rate": 1.2139472003305297e-05, "loss": 0.7716, "step": 3700 }, { "epoch": 0.2605420626539951, "grad_norm": 1.8778785467147827, "learning_rate": 1.2138388238024743e-05, "loss": 0.6622, "step": 3701 }, { "epoch": 0.2606124604012672, "grad_norm": 1.691753625869751, "learning_rate": 1.2137304205594731e-05, "loss": 0.7475, "step": 3702 }, { "epoch": 0.26068285814853925, "grad_norm": 1.926369309425354, "learning_rate": 1.2136219906071619e-05, "loss": 0.7806, "step": 3703 }, { "epoch": 0.26075325589581133, "grad_norm": 1.7346218824386597, "learning_rate": 1.2135135339511785e-05, "loss": 0.8122, "step": 3704 }, { "epoch": 0.2608236536430834, "grad_norm": 1.8292080163955688, "learning_rate": 1.2134050505971614e-05, "loss": 0.7029, "step": 3705 }, { "epoch": 0.2608940513903555, "grad_norm": 2.1288793087005615, "learning_rate": 1.2132965405507506e-05, "loss": 0.7025, "step": 3706 }, { "epoch": 0.2609644491376276, "grad_norm": 3.117194890975952, "learning_rate": 1.2131880038175878e-05, "loss": 0.6909, "step": 3707 }, { "epoch": 0.2610348468848997, "grad_norm": 2.2483811378479004, "learning_rate": 1.213079440403316e-05, "loss": 0.7074, "step": 3708 }, { "epoch": 0.2611052446321718, "grad_norm": 2.098398208618164, "learning_rate": 1.2129708503135794e-05, "loss": 0.7517, "step": 3709 }, { "epoch": 0.26117564237944385, "grad_norm": 1.899491310119629, "learning_rate": 1.2128622335540236e-05, "loss": 0.7624, "step": 3710 }, { "epoch": 0.26124604012671593, "grad_norm": 2.050945281982422, "learning_rate": 1.212753590130296e-05, "loss": 0.7106, "step": 3711 }, { "epoch": 0.261316437873988, "grad_norm": 2.3775253295898438, "learning_rate": 1.2126449200480446e-05, "loss": 0.6898, "step": 3712 }, { "epoch": 0.26138683562126014, "grad_norm": 1.7274373769760132, "learning_rate": 1.2125362233129197e-05, "loss": 0.7586, "step": 3713 }, { "epoch": 0.2614572333685322, "grad_norm": 1.8184096813201904, "learning_rate": 1.2124274999305724e-05, "loss": 0.6976, "step": 3714 }, { "epoch": 0.2615276311158043, "grad_norm": 1.883055567741394, "learning_rate": 1.2123187499066555e-05, "loss": 0.7893, "step": 3715 }, { "epoch": 0.26159802886307637, "grad_norm": 1.592800259590149, "learning_rate": 1.2122099732468222e-05, "loss": 0.6859, "step": 3716 }, { "epoch": 0.26166842661034845, "grad_norm": 2.1218338012695312, "learning_rate": 1.212101169956729e-05, "loss": 0.7998, "step": 3717 }, { "epoch": 0.2617388243576206, "grad_norm": 1.7491270303726196, "learning_rate": 1.2119923400420321e-05, "loss": 0.6695, "step": 3718 }, { "epoch": 0.26180922210489266, "grad_norm": 1.8466252088546753, "learning_rate": 1.2118834835083897e-05, "loss": 0.7948, "step": 3719 }, { "epoch": 0.26187961985216474, "grad_norm": 2.3268167972564697, "learning_rate": 1.2117746003614613e-05, "loss": 0.8033, "step": 3720 }, { "epoch": 0.2619500175994368, "grad_norm": 2.289970874786377, "learning_rate": 1.2116656906069082e-05, "loss": 0.8024, "step": 3721 }, { "epoch": 0.2620204153467089, "grad_norm": 1.7626911401748657, "learning_rate": 1.2115567542503923e-05, "loss": 0.6478, "step": 3722 }, { "epoch": 0.26209081309398097, "grad_norm": 1.5600993633270264, "learning_rate": 1.2114477912975774e-05, "loss": 0.7786, "step": 3723 }, { "epoch": 0.2621612108412531, "grad_norm": 2.1563470363616943, "learning_rate": 1.2113388017541287e-05, "loss": 0.7015, "step": 3724 }, { "epoch": 0.2622316085885252, "grad_norm": 1.7461328506469727, "learning_rate": 1.2112297856257127e-05, "loss": 0.7389, "step": 3725 }, { "epoch": 0.26230200633579726, "grad_norm": 2.337660312652588, "learning_rate": 1.211120742917997e-05, "loss": 0.7526, "step": 3726 }, { "epoch": 0.26237240408306933, "grad_norm": 2.4296700954437256, "learning_rate": 1.211011673636651e-05, "loss": 0.7214, "step": 3727 }, { "epoch": 0.2624428018303414, "grad_norm": 1.8229262828826904, "learning_rate": 1.2109025777873453e-05, "loss": 0.8164, "step": 3728 }, { "epoch": 0.26251319957761354, "grad_norm": 1.97089684009552, "learning_rate": 1.210793455375752e-05, "loss": 0.6732, "step": 3729 }, { "epoch": 0.2625835973248856, "grad_norm": 1.6465404033660889, "learning_rate": 1.2106843064075442e-05, "loss": 0.8022, "step": 3730 }, { "epoch": 0.2626539950721577, "grad_norm": 1.770046353340149, "learning_rate": 1.2105751308883969e-05, "loss": 0.7028, "step": 3731 }, { "epoch": 0.2627243928194298, "grad_norm": 1.7353830337524414, "learning_rate": 1.2104659288239861e-05, "loss": 0.7164, "step": 3732 }, { "epoch": 0.26279479056670185, "grad_norm": 1.826967477798462, "learning_rate": 1.2103567002199892e-05, "loss": 0.7115, "step": 3733 }, { "epoch": 0.26286518831397393, "grad_norm": 2.060905933380127, "learning_rate": 1.2102474450820854e-05, "loss": 0.7061, "step": 3734 }, { "epoch": 0.26293558606124606, "grad_norm": 1.6632068157196045, "learning_rate": 1.2101381634159548e-05, "loss": 0.7337, "step": 3735 }, { "epoch": 0.26300598380851814, "grad_norm": 1.8711063861846924, "learning_rate": 1.210028855227279e-05, "loss": 0.7993, "step": 3736 }, { "epoch": 0.2630763815557902, "grad_norm": 2.184187650680542, "learning_rate": 1.2099195205217413e-05, "loss": 0.6394, "step": 3737 }, { "epoch": 0.2631467793030623, "grad_norm": 1.815587043762207, "learning_rate": 1.2098101593050258e-05, "loss": 0.8144, "step": 3738 }, { "epoch": 0.26321717705033437, "grad_norm": 1.557524561882019, "learning_rate": 1.2097007715828184e-05, "loss": 0.6818, "step": 3739 }, { "epoch": 0.26328757479760645, "grad_norm": 1.8915354013442993, "learning_rate": 1.2095913573608065e-05, "loss": 0.7305, "step": 3740 }, { "epoch": 0.2633579725448786, "grad_norm": 2.67863392829895, "learning_rate": 1.209481916644678e-05, "loss": 0.7161, "step": 3741 }, { "epoch": 0.26342837029215066, "grad_norm": 2.025400400161743, "learning_rate": 1.2093724494401237e-05, "loss": 0.8226, "step": 3742 }, { "epoch": 0.26349876803942274, "grad_norm": 2.081714630126953, "learning_rate": 1.209262955752834e-05, "loss": 0.7552, "step": 3743 }, { "epoch": 0.2635691657866948, "grad_norm": 1.7477253675460815, "learning_rate": 1.2091534355885024e-05, "loss": 0.7559, "step": 3744 }, { "epoch": 0.2636395635339669, "grad_norm": 1.7836103439331055, "learning_rate": 1.2090438889528224e-05, "loss": 0.6195, "step": 3745 }, { "epoch": 0.263709961281239, "grad_norm": 1.6701231002807617, "learning_rate": 1.2089343158514895e-05, "loss": 0.8485, "step": 3746 }, { "epoch": 0.2637803590285111, "grad_norm": 5.664880275726318, "learning_rate": 1.2088247162902006e-05, "loss": 0.7252, "step": 3747 }, { "epoch": 0.2638507567757832, "grad_norm": 4.459561824798584, "learning_rate": 1.208715090274654e-05, "loss": 0.7174, "step": 3748 }, { "epoch": 0.26392115452305526, "grad_norm": 1.7916219234466553, "learning_rate": 1.208605437810549e-05, "loss": 0.7025, "step": 3749 }, { "epoch": 0.26399155227032733, "grad_norm": 2.096947431564331, "learning_rate": 1.2084957589035867e-05, "loss": 0.7419, "step": 3750 }, { "epoch": 0.2640619500175994, "grad_norm": 1.8616689443588257, "learning_rate": 1.2083860535594696e-05, "loss": 0.9309, "step": 3751 }, { "epoch": 0.26413234776487154, "grad_norm": 2.0282797813415527, "learning_rate": 1.2082763217839008e-05, "loss": 0.7448, "step": 3752 }, { "epoch": 0.2642027455121436, "grad_norm": 1.7964266538619995, "learning_rate": 1.208166563582586e-05, "loss": 0.7321, "step": 3753 }, { "epoch": 0.2642731432594157, "grad_norm": 2.179042100906372, "learning_rate": 1.2080567789612313e-05, "loss": 0.743, "step": 3754 }, { "epoch": 0.2643435410066878, "grad_norm": 1.8390053510665894, "learning_rate": 1.2079469679255445e-05, "loss": 0.9065, "step": 3755 }, { "epoch": 0.26441393875395985, "grad_norm": 2.004824161529541, "learning_rate": 1.2078371304812348e-05, "loss": 0.7413, "step": 3756 }, { "epoch": 0.264484336501232, "grad_norm": 1.9170900583267212, "learning_rate": 1.2077272666340127e-05, "loss": 0.6503, "step": 3757 }, { "epoch": 0.26455473424850406, "grad_norm": 1.6344085931777954, "learning_rate": 1.2076173763895902e-05, "loss": 0.6845, "step": 3758 }, { "epoch": 0.26462513199577614, "grad_norm": 1.9141815900802612, "learning_rate": 1.2075074597536807e-05, "loss": 0.8095, "step": 3759 }, { "epoch": 0.2646955297430482, "grad_norm": 1.8761615753173828, "learning_rate": 1.2073975167319986e-05, "loss": 0.7446, "step": 3760 }, { "epoch": 0.2647659274903203, "grad_norm": 2.0224928855895996, "learning_rate": 1.20728754733026e-05, "loss": 0.6873, "step": 3761 }, { "epoch": 0.2648363252375924, "grad_norm": 2.162325859069824, "learning_rate": 1.2071775515541826e-05, "loss": 0.7372, "step": 3762 }, { "epoch": 0.2649067229848645, "grad_norm": 1.736892819404602, "learning_rate": 1.207067529409485e-05, "loss": 0.763, "step": 3763 }, { "epoch": 0.2649771207321366, "grad_norm": 2.084728956222534, "learning_rate": 1.2069574809018872e-05, "loss": 0.6693, "step": 3764 }, { "epoch": 0.26504751847940866, "grad_norm": 1.7909811735153198, "learning_rate": 1.2068474060371106e-05, "loss": 0.7178, "step": 3765 }, { "epoch": 0.26511791622668074, "grad_norm": 1.9173625707626343, "learning_rate": 1.2067373048208787e-05, "loss": 0.7094, "step": 3766 }, { "epoch": 0.2651883139739528, "grad_norm": 1.7235119342803955, "learning_rate": 1.2066271772589152e-05, "loss": 0.7035, "step": 3767 }, { "epoch": 0.2652587117212249, "grad_norm": 1.7720882892608643, "learning_rate": 1.2065170233569461e-05, "loss": 0.8381, "step": 3768 }, { "epoch": 0.265329109468497, "grad_norm": 1.6778637170791626, "learning_rate": 1.2064068431206982e-05, "loss": 0.7823, "step": 3769 }, { "epoch": 0.2653995072157691, "grad_norm": 1.5917959213256836, "learning_rate": 1.2062966365558996e-05, "loss": 0.8267, "step": 3770 }, { "epoch": 0.2654699049630412, "grad_norm": 1.8786720037460327, "learning_rate": 1.2061864036682807e-05, "loss": 0.817, "step": 3771 }, { "epoch": 0.26554030271031326, "grad_norm": 1.8178073167800903, "learning_rate": 1.206076144463572e-05, "loss": 0.6555, "step": 3772 }, { "epoch": 0.26561070045758534, "grad_norm": 1.9280158281326294, "learning_rate": 1.2059658589475062e-05, "loss": 0.8477, "step": 3773 }, { "epoch": 0.26568109820485747, "grad_norm": 1.755266785621643, "learning_rate": 1.2058555471258175e-05, "loss": 0.7872, "step": 3774 }, { "epoch": 0.26575149595212955, "grad_norm": 1.8421982526779175, "learning_rate": 1.2057452090042404e-05, "loss": 0.6518, "step": 3775 }, { "epoch": 0.2658218936994016, "grad_norm": 2.5710620880126953, "learning_rate": 1.2056348445885121e-05, "loss": 0.8491, "step": 3776 }, { "epoch": 0.2658922914466737, "grad_norm": 2.037706136703491, "learning_rate": 1.2055244538843702e-05, "loss": 0.7278, "step": 3777 }, { "epoch": 0.2659626891939458, "grad_norm": 1.8966706991195679, "learning_rate": 1.205414036897554e-05, "loss": 0.6866, "step": 3778 }, { "epoch": 0.26603308694121786, "grad_norm": 1.9719735383987427, "learning_rate": 1.2053035936338045e-05, "loss": 0.7963, "step": 3779 }, { "epoch": 0.26610348468849, "grad_norm": 2.0502636432647705, "learning_rate": 1.2051931240988634e-05, "loss": 0.7256, "step": 3780 }, { "epoch": 0.26617388243576207, "grad_norm": 1.736865520477295, "learning_rate": 1.2050826282984745e-05, "loss": 0.6937, "step": 3781 }, { "epoch": 0.26624428018303414, "grad_norm": 1.6097546815872192, "learning_rate": 1.2049721062383819e-05, "loss": 0.7007, "step": 3782 }, { "epoch": 0.2663146779303062, "grad_norm": 1.9799835681915283, "learning_rate": 1.2048615579243325e-05, "loss": 0.6475, "step": 3783 }, { "epoch": 0.2663850756775783, "grad_norm": 1.600266933441162, "learning_rate": 1.2047509833620734e-05, "loss": 0.7057, "step": 3784 }, { "epoch": 0.26645547342485043, "grad_norm": 1.9862117767333984, "learning_rate": 1.2046403825573534e-05, "loss": 0.7491, "step": 3785 }, { "epoch": 0.2665258711721225, "grad_norm": 2.0891196727752686, "learning_rate": 1.204529755515923e-05, "loss": 0.6689, "step": 3786 }, { "epoch": 0.2665962689193946, "grad_norm": 2.0367112159729004, "learning_rate": 1.2044191022435339e-05, "loss": 0.7042, "step": 3787 }, { "epoch": 0.26666666666666666, "grad_norm": 1.9145265817642212, "learning_rate": 1.2043084227459383e-05, "loss": 0.7325, "step": 3788 }, { "epoch": 0.26673706441393874, "grad_norm": 1.747888207435608, "learning_rate": 1.2041977170288916e-05, "loss": 0.8184, "step": 3789 }, { "epoch": 0.2668074621612108, "grad_norm": 3.06093430519104, "learning_rate": 1.2040869850981487e-05, "loss": 0.8524, "step": 3790 }, { "epoch": 0.26687785990848295, "grad_norm": 2.0501463413238525, "learning_rate": 1.203976226959467e-05, "loss": 0.7387, "step": 3791 }, { "epoch": 0.26694825765575503, "grad_norm": 1.7529066801071167, "learning_rate": 1.2038654426186048e-05, "loss": 0.8285, "step": 3792 }, { "epoch": 0.2670186554030271, "grad_norm": 1.7912625074386597, "learning_rate": 1.203754632081322e-05, "loss": 0.8401, "step": 3793 }, { "epoch": 0.2670890531502992, "grad_norm": 2.4856936931610107, "learning_rate": 1.2036437953533793e-05, "loss": 0.8307, "step": 3794 }, { "epoch": 0.26715945089757126, "grad_norm": 1.8110512495040894, "learning_rate": 1.2035329324405398e-05, "loss": 0.7458, "step": 3795 }, { "epoch": 0.26722984864484334, "grad_norm": 1.9504203796386719, "learning_rate": 1.2034220433485674e-05, "loss": 0.7459, "step": 3796 }, { "epoch": 0.26730024639211547, "grad_norm": 2.204909563064575, "learning_rate": 1.2033111280832266e-05, "loss": 0.8255, "step": 3797 }, { "epoch": 0.26737064413938755, "grad_norm": 1.7151297330856323, "learning_rate": 1.2032001866502847e-05, "loss": 0.6887, "step": 3798 }, { "epoch": 0.2674410418866596, "grad_norm": 1.9220085144042969, "learning_rate": 1.2030892190555093e-05, "loss": 0.8765, "step": 3799 }, { "epoch": 0.2675114396339317, "grad_norm": 2.105177164077759, "learning_rate": 1.2029782253046696e-05, "loss": 0.6458, "step": 3800 }, { "epoch": 0.2675818373812038, "grad_norm": 1.7963594198226929, "learning_rate": 1.2028672054035368e-05, "loss": 0.6497, "step": 3801 }, { "epoch": 0.2676522351284759, "grad_norm": 2.2115871906280518, "learning_rate": 1.2027561593578824e-05, "loss": 0.8414, "step": 3802 }, { "epoch": 0.267722632875748, "grad_norm": 1.817186713218689, "learning_rate": 1.2026450871734797e-05, "loss": 0.6109, "step": 3803 }, { "epoch": 0.26779303062302007, "grad_norm": 1.991259217262268, "learning_rate": 1.202533988856104e-05, "loss": 0.6851, "step": 3804 }, { "epoch": 0.26786342837029214, "grad_norm": 1.8222591876983643, "learning_rate": 1.202422864411531e-05, "loss": 0.7213, "step": 3805 }, { "epoch": 0.2679338261175642, "grad_norm": 1.7424530982971191, "learning_rate": 1.202311713845538e-05, "loss": 0.6896, "step": 3806 }, { "epoch": 0.2680042238648363, "grad_norm": 1.8607991933822632, "learning_rate": 1.2022005371639044e-05, "loss": 0.7211, "step": 3807 }, { "epoch": 0.26807462161210843, "grad_norm": 1.756579041481018, "learning_rate": 1.2020893343724103e-05, "loss": 0.7301, "step": 3808 }, { "epoch": 0.2681450193593805, "grad_norm": 1.8377853631973267, "learning_rate": 1.2019781054768365e-05, "loss": 0.6916, "step": 3809 }, { "epoch": 0.2682154171066526, "grad_norm": 1.791008472442627, "learning_rate": 1.2018668504829666e-05, "loss": 0.7449, "step": 3810 }, { "epoch": 0.26828581485392466, "grad_norm": 2.210561990737915, "learning_rate": 1.2017555693965845e-05, "loss": 0.7573, "step": 3811 }, { "epoch": 0.26835621260119674, "grad_norm": 2.3197529315948486, "learning_rate": 1.201644262223476e-05, "loss": 0.7861, "step": 3812 }, { "epoch": 0.2684266103484689, "grad_norm": 2.1047229766845703, "learning_rate": 1.2015329289694281e-05, "loss": 0.7689, "step": 3813 }, { "epoch": 0.26849700809574095, "grad_norm": 1.9818062782287598, "learning_rate": 1.2014215696402288e-05, "loss": 0.7719, "step": 3814 }, { "epoch": 0.26856740584301303, "grad_norm": 1.7046561241149902, "learning_rate": 1.2013101842416682e-05, "loss": 0.6785, "step": 3815 }, { "epoch": 0.2686378035902851, "grad_norm": 2.1183481216430664, "learning_rate": 1.201198772779537e-05, "loss": 0.7991, "step": 3816 }, { "epoch": 0.2687082013375572, "grad_norm": 1.879280686378479, "learning_rate": 1.2010873352596277e-05, "loss": 0.6323, "step": 3817 }, { "epoch": 0.26877859908482926, "grad_norm": 1.827492117881775, "learning_rate": 1.200975871687734e-05, "loss": 0.8392, "step": 3818 }, { "epoch": 0.2688489968321014, "grad_norm": 1.5740671157836914, "learning_rate": 1.200864382069651e-05, "loss": 0.5986, "step": 3819 }, { "epoch": 0.26891939457937347, "grad_norm": 2.233166456222534, "learning_rate": 1.2007528664111752e-05, "loss": 0.8235, "step": 3820 }, { "epoch": 0.26898979232664555, "grad_norm": 1.7757976055145264, "learning_rate": 1.2006413247181043e-05, "loss": 0.708, "step": 3821 }, { "epoch": 0.2690601900739176, "grad_norm": 2.003347873687744, "learning_rate": 1.2005297569962375e-05, "loss": 0.778, "step": 3822 }, { "epoch": 0.2691305878211897, "grad_norm": 2.4470174312591553, "learning_rate": 1.2004181632513756e-05, "loss": 0.8365, "step": 3823 }, { "epoch": 0.26920098556846184, "grad_norm": 2.1624767780303955, "learning_rate": 1.20030654348932e-05, "loss": 0.7179, "step": 3824 }, { "epoch": 0.2692713833157339, "grad_norm": 2.132476806640625, "learning_rate": 1.200194897715874e-05, "loss": 0.763, "step": 3825 }, { "epoch": 0.269341781063006, "grad_norm": 1.8587969541549683, "learning_rate": 1.2000832259368425e-05, "loss": 0.5459, "step": 3826 }, { "epoch": 0.26941217881027807, "grad_norm": 1.8711103200912476, "learning_rate": 1.199971528158031e-05, "loss": 0.7941, "step": 3827 }, { "epoch": 0.26948257655755015, "grad_norm": 1.7823796272277832, "learning_rate": 1.1998598043852471e-05, "loss": 0.6841, "step": 3828 }, { "epoch": 0.2695529743048222, "grad_norm": 2.273538827896118, "learning_rate": 1.1997480546242991e-05, "loss": 0.6922, "step": 3829 }, { "epoch": 0.26962337205209436, "grad_norm": 1.6793993711471558, "learning_rate": 1.1996362788809971e-05, "loss": 0.691, "step": 3830 }, { "epoch": 0.26969376979936643, "grad_norm": 1.7589393854141235, "learning_rate": 1.1995244771611526e-05, "loss": 0.7204, "step": 3831 }, { "epoch": 0.2697641675466385, "grad_norm": 2.303332567214966, "learning_rate": 1.1994126494705783e-05, "loss": 0.8503, "step": 3832 }, { "epoch": 0.2698345652939106, "grad_norm": 2.0999155044555664, "learning_rate": 1.1993007958150877e-05, "loss": 0.8036, "step": 3833 }, { "epoch": 0.26990496304118267, "grad_norm": 1.9529392719268799, "learning_rate": 1.1991889162004968e-05, "loss": 0.7182, "step": 3834 }, { "epoch": 0.26997536078845474, "grad_norm": 1.9266053438186646, "learning_rate": 1.199077010632622e-05, "loss": 0.7208, "step": 3835 }, { "epoch": 0.2700457585357269, "grad_norm": 1.805142879486084, "learning_rate": 1.1989650791172815e-05, "loss": 0.7335, "step": 3836 }, { "epoch": 0.27011615628299895, "grad_norm": 1.8431488275527954, "learning_rate": 1.1988531216602944e-05, "loss": 0.7685, "step": 3837 }, { "epoch": 0.27018655403027103, "grad_norm": 1.745913028717041, "learning_rate": 1.1987411382674818e-05, "loss": 0.799, "step": 3838 }, { "epoch": 0.2702569517775431, "grad_norm": 1.8130571842193604, "learning_rate": 1.1986291289446657e-05, "loss": 0.669, "step": 3839 }, { "epoch": 0.2703273495248152, "grad_norm": 1.9802912473678589, "learning_rate": 1.1985170936976697e-05, "loss": 0.6569, "step": 3840 }, { "epoch": 0.2703977472720873, "grad_norm": 2.0341944694519043, "learning_rate": 1.1984050325323186e-05, "loss": 0.7961, "step": 3841 }, { "epoch": 0.2704681450193594, "grad_norm": 1.891936182975769, "learning_rate": 1.1982929454544384e-05, "loss": 0.6646, "step": 3842 }, { "epoch": 0.2705385427666315, "grad_norm": 1.5602281093597412, "learning_rate": 1.1981808324698565e-05, "loss": 0.7045, "step": 3843 }, { "epoch": 0.27060894051390355, "grad_norm": 1.804823875427246, "learning_rate": 1.1980686935844023e-05, "loss": 0.7116, "step": 3844 }, { "epoch": 0.27067933826117563, "grad_norm": 1.9481220245361328, "learning_rate": 1.1979565288039053e-05, "loss": 0.7615, "step": 3845 }, { "epoch": 0.2707497360084477, "grad_norm": 2.0233476161956787, "learning_rate": 1.1978443381341976e-05, "loss": 0.8537, "step": 3846 }, { "epoch": 0.27082013375571984, "grad_norm": 1.6426658630371094, "learning_rate": 1.1977321215811119e-05, "loss": 0.666, "step": 3847 }, { "epoch": 0.2708905315029919, "grad_norm": 1.8080860376358032, "learning_rate": 1.1976198791504823e-05, "loss": 0.7493, "step": 3848 }, { "epoch": 0.270960929250264, "grad_norm": 1.876859426498413, "learning_rate": 1.1975076108481445e-05, "loss": 0.6739, "step": 3849 }, { "epoch": 0.27103132699753607, "grad_norm": 1.896008014678955, "learning_rate": 1.1973953166799357e-05, "loss": 0.7514, "step": 3850 }, { "epoch": 0.27110172474480815, "grad_norm": 1.8546111583709717, "learning_rate": 1.1972829966516937e-05, "loss": 0.8016, "step": 3851 }, { "epoch": 0.2711721224920803, "grad_norm": 1.9002022743225098, "learning_rate": 1.1971706507692584e-05, "loss": 0.6873, "step": 3852 }, { "epoch": 0.27124252023935236, "grad_norm": 2.0734760761260986, "learning_rate": 1.1970582790384704e-05, "loss": 0.7236, "step": 3853 }, { "epoch": 0.27131291798662444, "grad_norm": 1.868902325630188, "learning_rate": 1.1969458814651728e-05, "loss": 0.7906, "step": 3854 }, { "epoch": 0.2713833157338965, "grad_norm": 1.7149838209152222, "learning_rate": 1.1968334580552086e-05, "loss": 0.7471, "step": 3855 }, { "epoch": 0.2714537134811686, "grad_norm": 2.151376962661743, "learning_rate": 1.1967210088144227e-05, "loss": 0.6836, "step": 3856 }, { "epoch": 0.27152411122844067, "grad_norm": 1.7553517818450928, "learning_rate": 1.196608533748662e-05, "loss": 0.7428, "step": 3857 }, { "epoch": 0.2715945089757128, "grad_norm": 1.9474021196365356, "learning_rate": 1.1964960328637737e-05, "loss": 0.6725, "step": 3858 }, { "epoch": 0.2716649067229849, "grad_norm": 1.8811299800872803, "learning_rate": 1.196383506165607e-05, "loss": 0.7005, "step": 3859 }, { "epoch": 0.27173530447025696, "grad_norm": 1.9418044090270996, "learning_rate": 1.1962709536600123e-05, "loss": 0.8386, "step": 3860 }, { "epoch": 0.27180570221752903, "grad_norm": 2.144973039627075, "learning_rate": 1.1961583753528412e-05, "loss": 0.7349, "step": 3861 }, { "epoch": 0.2718760999648011, "grad_norm": 1.6176515817642212, "learning_rate": 1.196045771249947e-05, "loss": 0.7639, "step": 3862 }, { "epoch": 0.2719464977120732, "grad_norm": 2.049866199493408, "learning_rate": 1.1959331413571838e-05, "loss": 0.7087, "step": 3863 }, { "epoch": 0.2720168954593453, "grad_norm": 2.0620741844177246, "learning_rate": 1.1958204856804075e-05, "loss": 0.7391, "step": 3864 }, { "epoch": 0.2720872932066174, "grad_norm": 1.6856505870819092, "learning_rate": 1.1957078042254751e-05, "loss": 0.7282, "step": 3865 }, { "epoch": 0.2721576909538895, "grad_norm": 1.722184181213379, "learning_rate": 1.195595096998245e-05, "loss": 0.6876, "step": 3866 }, { "epoch": 0.27222808870116155, "grad_norm": 1.6155259609222412, "learning_rate": 1.1954823640045768e-05, "loss": 0.739, "step": 3867 }, { "epoch": 0.27229848644843363, "grad_norm": 1.7632029056549072, "learning_rate": 1.1953696052503321e-05, "loss": 0.828, "step": 3868 }, { "epoch": 0.27236888419570576, "grad_norm": 1.7166856527328491, "learning_rate": 1.1952568207413727e-05, "loss": 0.7498, "step": 3869 }, { "epoch": 0.27243928194297784, "grad_norm": 1.7267314195632935, "learning_rate": 1.195144010483563e-05, "loss": 0.8155, "step": 3870 }, { "epoch": 0.2725096796902499, "grad_norm": 1.8814709186553955, "learning_rate": 1.1950311744827673e-05, "loss": 0.6446, "step": 3871 }, { "epoch": 0.272580077437522, "grad_norm": 1.939123272895813, "learning_rate": 1.1949183127448527e-05, "loss": 0.8221, "step": 3872 }, { "epoch": 0.27265047518479407, "grad_norm": 1.4833722114562988, "learning_rate": 1.1948054252756868e-05, "loss": 0.7908, "step": 3873 }, { "epoch": 0.27272087293206615, "grad_norm": 2.2403600215911865, "learning_rate": 1.1946925120811389e-05, "loss": 0.7744, "step": 3874 }, { "epoch": 0.2727912706793383, "grad_norm": 1.887914776802063, "learning_rate": 1.1945795731670792e-05, "loss": 0.7324, "step": 3875 }, { "epoch": 0.27286166842661036, "grad_norm": 1.7462515830993652, "learning_rate": 1.1944666085393794e-05, "loss": 0.5705, "step": 3876 }, { "epoch": 0.27293206617388244, "grad_norm": 2.5847558975219727, "learning_rate": 1.194353618203913e-05, "loss": 0.7981, "step": 3877 }, { "epoch": 0.2730024639211545, "grad_norm": 1.5702255964279175, "learning_rate": 1.194240602166554e-05, "loss": 0.7315, "step": 3878 }, { "epoch": 0.2730728616684266, "grad_norm": 1.5386414527893066, "learning_rate": 1.1941275604331787e-05, "loss": 0.7439, "step": 3879 }, { "epoch": 0.2731432594156987, "grad_norm": 1.9823660850524902, "learning_rate": 1.194014493009664e-05, "loss": 0.8286, "step": 3880 }, { "epoch": 0.2732136571629708, "grad_norm": 1.7457764148712158, "learning_rate": 1.1939013999018885e-05, "loss": 0.6361, "step": 3881 }, { "epoch": 0.2732840549102429, "grad_norm": 2.1409554481506348, "learning_rate": 1.1937882811157316e-05, "loss": 0.747, "step": 3882 }, { "epoch": 0.27335445265751496, "grad_norm": 1.7860232591629028, "learning_rate": 1.1936751366570752e-05, "loss": 0.7935, "step": 3883 }, { "epoch": 0.27342485040478703, "grad_norm": 1.9025826454162598, "learning_rate": 1.193561966531801e-05, "loss": 0.6753, "step": 3884 }, { "epoch": 0.2734952481520591, "grad_norm": 1.7915054559707642, "learning_rate": 1.1934487707457932e-05, "loss": 0.6366, "step": 3885 }, { "epoch": 0.27356564589933124, "grad_norm": 2.0761730670928955, "learning_rate": 1.1933355493049371e-05, "loss": 0.7288, "step": 3886 }, { "epoch": 0.2736360436466033, "grad_norm": 2.248682975769043, "learning_rate": 1.1932223022151191e-05, "loss": 0.7893, "step": 3887 }, { "epoch": 0.2737064413938754, "grad_norm": 2.1294896602630615, "learning_rate": 1.1931090294822265e-05, "loss": 0.7442, "step": 3888 }, { "epoch": 0.2737768391411475, "grad_norm": 1.6048939228057861, "learning_rate": 1.1929957311121492e-05, "loss": 0.7252, "step": 3889 }, { "epoch": 0.27384723688841955, "grad_norm": 2.3513660430908203, "learning_rate": 1.1928824071107773e-05, "loss": 0.7003, "step": 3890 }, { "epoch": 0.27391763463569163, "grad_norm": 1.6869572401046753, "learning_rate": 1.1927690574840025e-05, "loss": 0.722, "step": 3891 }, { "epoch": 0.27398803238296376, "grad_norm": 1.9833486080169678, "learning_rate": 1.1926556822377184e-05, "loss": 0.6745, "step": 3892 }, { "epoch": 0.27405843013023584, "grad_norm": 2.383958339691162, "learning_rate": 1.192542281377819e-05, "loss": 0.6148, "step": 3893 }, { "epoch": 0.2741288278775079, "grad_norm": 1.9743777513504028, "learning_rate": 1.1924288549102002e-05, "loss": 0.7732, "step": 3894 }, { "epoch": 0.27419922562478, "grad_norm": 1.5348643064498901, "learning_rate": 1.1923154028407595e-05, "loss": 0.8069, "step": 3895 }, { "epoch": 0.2742696233720521, "grad_norm": 1.7254599332809448, "learning_rate": 1.192201925175395e-05, "loss": 0.8013, "step": 3896 }, { "epoch": 0.2743400211193242, "grad_norm": 1.5363547801971436, "learning_rate": 1.1920884219200068e-05, "loss": 0.7633, "step": 3897 }, { "epoch": 0.2744104188665963, "grad_norm": 2.465437412261963, "learning_rate": 1.1919748930804958e-05, "loss": 0.7416, "step": 3898 }, { "epoch": 0.27448081661386836, "grad_norm": 1.8391296863555908, "learning_rate": 1.1918613386627647e-05, "loss": 0.6799, "step": 3899 }, { "epoch": 0.27455121436114044, "grad_norm": 1.820837140083313, "learning_rate": 1.1917477586727169e-05, "loss": 0.7506, "step": 3900 }, { "epoch": 0.2746216121084125, "grad_norm": 1.8815189599990845, "learning_rate": 1.191634153116258e-05, "loss": 0.7487, "step": 3901 }, { "epoch": 0.2746920098556846, "grad_norm": 1.8758209943771362, "learning_rate": 1.191520521999294e-05, "loss": 0.6303, "step": 3902 }, { "epoch": 0.2747624076029567, "grad_norm": 2.1485259532928467, "learning_rate": 1.1914068653277331e-05, "loss": 0.7304, "step": 3903 }, { "epoch": 0.2748328053502288, "grad_norm": 1.8579866886138916, "learning_rate": 1.191293183107484e-05, "loss": 0.7951, "step": 3904 }, { "epoch": 0.2749032030975009, "grad_norm": 1.7917592525482178, "learning_rate": 1.1911794753444575e-05, "loss": 0.7142, "step": 3905 }, { "epoch": 0.27497360084477296, "grad_norm": 1.6829959154129028, "learning_rate": 1.1910657420445652e-05, "loss": 0.6523, "step": 3906 }, { "epoch": 0.27504399859204504, "grad_norm": 1.6976457834243774, "learning_rate": 1.1909519832137204e-05, "loss": 0.6107, "step": 3907 }, { "epoch": 0.27511439633931717, "grad_norm": 2.0412561893463135, "learning_rate": 1.190838198857837e-05, "loss": 0.7182, "step": 3908 }, { "epoch": 0.27518479408658925, "grad_norm": 2.0106358528137207, "learning_rate": 1.1907243889828316e-05, "loss": 0.8167, "step": 3909 }, { "epoch": 0.2752551918338613, "grad_norm": 1.8946852684020996, "learning_rate": 1.1906105535946206e-05, "loss": 0.8229, "step": 3910 }, { "epoch": 0.2753255895811334, "grad_norm": 2.0233354568481445, "learning_rate": 1.1904966926991225e-05, "loss": 0.7311, "step": 3911 }, { "epoch": 0.2753959873284055, "grad_norm": 1.7656471729278564, "learning_rate": 1.1903828063022571e-05, "loss": 0.7911, "step": 3912 }, { "epoch": 0.27546638507567756, "grad_norm": 2.113478899002075, "learning_rate": 1.1902688944099456e-05, "loss": 0.8127, "step": 3913 }, { "epoch": 0.2755367828229497, "grad_norm": 2.0491926670074463, "learning_rate": 1.1901549570281102e-05, "loss": 0.7337, "step": 3914 }, { "epoch": 0.27560718057022177, "grad_norm": 2.033907890319824, "learning_rate": 1.1900409941626748e-05, "loss": 0.7848, "step": 3915 }, { "epoch": 0.27567757831749384, "grad_norm": 1.6825600862503052, "learning_rate": 1.1899270058195642e-05, "loss": 0.7357, "step": 3916 }, { "epoch": 0.2757479760647659, "grad_norm": 1.8668832778930664, "learning_rate": 1.1898129920047048e-05, "loss": 0.6905, "step": 3917 }, { "epoch": 0.275818373812038, "grad_norm": 1.966882348060608, "learning_rate": 1.1896989527240245e-05, "loss": 0.7839, "step": 3918 }, { "epoch": 0.2758887715593101, "grad_norm": 1.8359715938568115, "learning_rate": 1.1895848879834519e-05, "loss": 0.8352, "step": 3919 }, { "epoch": 0.2759591693065822, "grad_norm": 1.5507322549819946, "learning_rate": 1.1894707977889177e-05, "loss": 0.7801, "step": 3920 }, { "epoch": 0.2760295670538543, "grad_norm": 1.7686100006103516, "learning_rate": 1.1893566821463536e-05, "loss": 0.8456, "step": 3921 }, { "epoch": 0.27609996480112636, "grad_norm": 1.5609196424484253, "learning_rate": 1.189242541061692e-05, "loss": 0.6357, "step": 3922 }, { "epoch": 0.27617036254839844, "grad_norm": 2.1082611083984375, "learning_rate": 1.189128374540868e-05, "loss": 0.8138, "step": 3923 }, { "epoch": 0.2762407602956705, "grad_norm": 1.9912757873535156, "learning_rate": 1.1890141825898166e-05, "loss": 0.7341, "step": 3924 }, { "epoch": 0.27631115804294265, "grad_norm": 1.5862867832183838, "learning_rate": 1.1888999652144752e-05, "loss": 0.6835, "step": 3925 }, { "epoch": 0.27638155579021473, "grad_norm": 1.928122878074646, "learning_rate": 1.1887857224207818e-05, "loss": 0.7613, "step": 3926 }, { "epoch": 0.2764519535374868, "grad_norm": 1.834381341934204, "learning_rate": 1.1886714542146759e-05, "loss": 0.7543, "step": 3927 }, { "epoch": 0.2765223512847589, "grad_norm": 1.8013280630111694, "learning_rate": 1.1885571606020985e-05, "loss": 0.8341, "step": 3928 }, { "epoch": 0.27659274903203096, "grad_norm": 1.7989022731781006, "learning_rate": 1.188442841588992e-05, "loss": 0.8286, "step": 3929 }, { "epoch": 0.27666314677930304, "grad_norm": 1.6944236755371094, "learning_rate": 1.1883284971812996e-05, "loss": 0.6851, "step": 3930 }, { "epoch": 0.27673354452657517, "grad_norm": 1.896464467048645, "learning_rate": 1.1882141273849665e-05, "loss": 0.5972, "step": 3931 }, { "epoch": 0.27680394227384725, "grad_norm": 2.08565616607666, "learning_rate": 1.1880997322059388e-05, "loss": 0.8634, "step": 3932 }, { "epoch": 0.2768743400211193, "grad_norm": 1.905483365058899, "learning_rate": 1.187985311650164e-05, "loss": 0.7275, "step": 3933 }, { "epoch": 0.2769447377683914, "grad_norm": 1.4740272760391235, "learning_rate": 1.1878708657235909e-05, "loss": 0.7651, "step": 3934 }, { "epoch": 0.2770151355156635, "grad_norm": 1.9124128818511963, "learning_rate": 1.1877563944321695e-05, "loss": 0.7324, "step": 3935 }, { "epoch": 0.2770855332629356, "grad_norm": 2.0551040172576904, "learning_rate": 1.1876418977818515e-05, "loss": 0.6654, "step": 3936 }, { "epoch": 0.2771559310102077, "grad_norm": 1.9841262102127075, "learning_rate": 1.1875273757785896e-05, "loss": 0.7444, "step": 3937 }, { "epoch": 0.27722632875747977, "grad_norm": 1.660323143005371, "learning_rate": 1.1874128284283378e-05, "loss": 0.6027, "step": 3938 }, { "epoch": 0.27729672650475184, "grad_norm": 1.8307011127471924, "learning_rate": 1.1872982557370519e-05, "loss": 0.6553, "step": 3939 }, { "epoch": 0.2773671242520239, "grad_norm": 1.6628056764602661, "learning_rate": 1.1871836577106882e-05, "loss": 0.6907, "step": 3940 }, { "epoch": 0.277437521999296, "grad_norm": 1.8518277406692505, "learning_rate": 1.187069034355205e-05, "loss": 0.7199, "step": 3941 }, { "epoch": 0.27750791974656813, "grad_norm": 1.96440589427948, "learning_rate": 1.1869543856765615e-05, "loss": 0.8244, "step": 3942 }, { "epoch": 0.2775783174938402, "grad_norm": 1.772930383682251, "learning_rate": 1.1868397116807183e-05, "loss": 0.7313, "step": 3943 }, { "epoch": 0.2776487152411123, "grad_norm": 1.854162335395813, "learning_rate": 1.1867250123736378e-05, "loss": 0.6834, "step": 3944 }, { "epoch": 0.27771911298838436, "grad_norm": 1.9780874252319336, "learning_rate": 1.186610287761283e-05, "loss": 0.7239, "step": 3945 }, { "epoch": 0.27778951073565644, "grad_norm": 2.0153515338897705, "learning_rate": 1.1864955378496187e-05, "loss": 0.6862, "step": 3946 }, { "epoch": 0.2778599084829285, "grad_norm": 1.8308848142623901, "learning_rate": 1.1863807626446109e-05, "loss": 0.7725, "step": 3947 }, { "epoch": 0.27793030623020065, "grad_norm": 1.8398529291152954, "learning_rate": 1.1862659621522265e-05, "loss": 0.7442, "step": 3948 }, { "epoch": 0.27800070397747273, "grad_norm": 1.777825117111206, "learning_rate": 1.1861511363784345e-05, "loss": 0.6268, "step": 3949 }, { "epoch": 0.2780711017247448, "grad_norm": 1.6637208461761475, "learning_rate": 1.1860362853292047e-05, "loss": 0.6235, "step": 3950 }, { "epoch": 0.2781414994720169, "grad_norm": 1.8231853246688843, "learning_rate": 1.1859214090105083e-05, "loss": 0.6539, "step": 3951 }, { "epoch": 0.27821189721928896, "grad_norm": 1.9549661874771118, "learning_rate": 1.1858065074283175e-05, "loss": 0.7338, "step": 3952 }, { "epoch": 0.2782822949665611, "grad_norm": 1.7487702369689941, "learning_rate": 1.1856915805886065e-05, "loss": 0.7239, "step": 3953 }, { "epoch": 0.27835269271383317, "grad_norm": 1.7718636989593506, "learning_rate": 1.1855766284973504e-05, "loss": 0.6977, "step": 3954 }, { "epoch": 0.27842309046110525, "grad_norm": 2.595485210418701, "learning_rate": 1.1854616511605256e-05, "loss": 0.7689, "step": 3955 }, { "epoch": 0.2784934882083773, "grad_norm": 2.023730993270874, "learning_rate": 1.1853466485841098e-05, "loss": 0.8546, "step": 3956 }, { "epoch": 0.2785638859556494, "grad_norm": 2.6776604652404785, "learning_rate": 1.1852316207740824e-05, "loss": 0.7036, "step": 3957 }, { "epoch": 0.2786342837029215, "grad_norm": 1.9854227304458618, "learning_rate": 1.1851165677364235e-05, "loss": 0.7395, "step": 3958 }, { "epoch": 0.2787046814501936, "grad_norm": 1.7253499031066895, "learning_rate": 1.1850014894771148e-05, "loss": 0.6224, "step": 3959 }, { "epoch": 0.2787750791974657, "grad_norm": 1.868184208869934, "learning_rate": 1.1848863860021396e-05, "loss": 0.8431, "step": 3960 }, { "epoch": 0.27884547694473777, "grad_norm": 1.6449055671691895, "learning_rate": 1.184771257317482e-05, "loss": 0.664, "step": 3961 }, { "epoch": 0.27891587469200985, "grad_norm": 1.851151466369629, "learning_rate": 1.1846561034291274e-05, "loss": 0.7107, "step": 3962 }, { "epoch": 0.2789862724392819, "grad_norm": 1.9545187950134277, "learning_rate": 1.1845409243430635e-05, "loss": 0.697, "step": 3963 }, { "epoch": 0.27905667018655406, "grad_norm": 1.8516836166381836, "learning_rate": 1.184425720065278e-05, "loss": 0.6811, "step": 3964 }, { "epoch": 0.27912706793382613, "grad_norm": 2.136770725250244, "learning_rate": 1.1843104906017604e-05, "loss": 0.7586, "step": 3965 }, { "epoch": 0.2791974656810982, "grad_norm": 1.725250005722046, "learning_rate": 1.1841952359585019e-05, "loss": 0.7379, "step": 3966 }, { "epoch": 0.2792678634283703, "grad_norm": 2.1432723999023438, "learning_rate": 1.1840799561414946e-05, "loss": 0.6607, "step": 3967 }, { "epoch": 0.27933826117564237, "grad_norm": 1.9188305139541626, "learning_rate": 1.1839646511567319e-05, "loss": 0.735, "step": 3968 }, { "epoch": 0.27940865892291444, "grad_norm": 1.7334930896759033, "learning_rate": 1.1838493210102087e-05, "loss": 0.6616, "step": 3969 }, { "epoch": 0.2794790566701866, "grad_norm": 1.7690743207931519, "learning_rate": 1.183733965707921e-05, "loss": 0.6248, "step": 3970 }, { "epoch": 0.27954945441745865, "grad_norm": 1.7078536748886108, "learning_rate": 1.1836185852558664e-05, "loss": 0.6536, "step": 3971 }, { "epoch": 0.27961985216473073, "grad_norm": 1.5742218494415283, "learning_rate": 1.1835031796600437e-05, "loss": 0.7006, "step": 3972 }, { "epoch": 0.2796902499120028, "grad_norm": 2.0936522483825684, "learning_rate": 1.1833877489264527e-05, "loss": 0.7077, "step": 3973 }, { "epoch": 0.2797606476592749, "grad_norm": 2.11342191696167, "learning_rate": 1.1832722930610948e-05, "loss": 0.6215, "step": 3974 }, { "epoch": 0.27983104540654696, "grad_norm": 2.3073060512542725, "learning_rate": 1.1831568120699726e-05, "loss": 0.7858, "step": 3975 }, { "epoch": 0.2799014431538191, "grad_norm": 1.7394955158233643, "learning_rate": 1.1830413059590906e-05, "loss": 0.78, "step": 3976 }, { "epoch": 0.2799718409010912, "grad_norm": 1.9047729969024658, "learning_rate": 1.1829257747344533e-05, "loss": 0.7045, "step": 3977 }, { "epoch": 0.28004223864836325, "grad_norm": 4.224178791046143, "learning_rate": 1.1828102184020676e-05, "loss": 0.7154, "step": 3978 }, { "epoch": 0.28011263639563533, "grad_norm": 2.0666863918304443, "learning_rate": 1.1826946369679415e-05, "loss": 0.7744, "step": 3979 }, { "epoch": 0.2801830341429074, "grad_norm": 2.211413621902466, "learning_rate": 1.182579030438084e-05, "loss": 0.6339, "step": 3980 }, { "epoch": 0.28025343189017954, "grad_norm": 1.7837681770324707, "learning_rate": 1.1824633988185056e-05, "loss": 0.7475, "step": 3981 }, { "epoch": 0.2803238296374516, "grad_norm": 1.8710612058639526, "learning_rate": 1.1823477421152184e-05, "loss": 0.7091, "step": 3982 }, { "epoch": 0.2803942273847237, "grad_norm": 2.2514004707336426, "learning_rate": 1.182232060334235e-05, "loss": 0.6202, "step": 3983 }, { "epoch": 0.28046462513199577, "grad_norm": 1.9524353742599487, "learning_rate": 1.1821163534815702e-05, "loss": 0.6729, "step": 3984 }, { "epoch": 0.28053502287926785, "grad_norm": 2.047382354736328, "learning_rate": 1.1820006215632394e-05, "loss": 0.7794, "step": 3985 }, { "epoch": 0.2806054206265399, "grad_norm": 1.9501782655715942, "learning_rate": 1.1818848645852598e-05, "loss": 0.8123, "step": 3986 }, { "epoch": 0.28067581837381206, "grad_norm": 1.5328397750854492, "learning_rate": 1.1817690825536498e-05, "loss": 0.7816, "step": 3987 }, { "epoch": 0.28074621612108414, "grad_norm": 2.5197253227233887, "learning_rate": 1.1816532754744288e-05, "loss": 0.7367, "step": 3988 }, { "epoch": 0.2808166138683562, "grad_norm": 1.7260819673538208, "learning_rate": 1.1815374433536178e-05, "loss": 0.6694, "step": 3989 }, { "epoch": 0.2808870116156283, "grad_norm": 2.0017902851104736, "learning_rate": 1.181421586197239e-05, "loss": 0.7316, "step": 3990 }, { "epoch": 0.28095740936290037, "grad_norm": 1.867958664894104, "learning_rate": 1.1813057040113159e-05, "loss": 0.7448, "step": 3991 }, { "epoch": 0.2810278071101725, "grad_norm": 1.914707899093628, "learning_rate": 1.1811897968018732e-05, "loss": 0.7334, "step": 3992 }, { "epoch": 0.2810982048574446, "grad_norm": 1.5701572895050049, "learning_rate": 1.1810738645749375e-05, "loss": 0.6875, "step": 3993 }, { "epoch": 0.28116860260471666, "grad_norm": 1.6002938747406006, "learning_rate": 1.1809579073365358e-05, "loss": 0.6565, "step": 3994 }, { "epoch": 0.28123900035198873, "grad_norm": 3.0042548179626465, "learning_rate": 1.1808419250926969e-05, "loss": 0.7608, "step": 3995 }, { "epoch": 0.2813093980992608, "grad_norm": 2.0234296321868896, "learning_rate": 1.1807259178494507e-05, "loss": 0.7731, "step": 3996 }, { "epoch": 0.2813797958465329, "grad_norm": 1.856994390487671, "learning_rate": 1.1806098856128287e-05, "loss": 0.6714, "step": 3997 }, { "epoch": 0.281450193593805, "grad_norm": 1.8975565433502197, "learning_rate": 1.1804938283888637e-05, "loss": 0.7376, "step": 3998 }, { "epoch": 0.2815205913410771, "grad_norm": 1.9372038841247559, "learning_rate": 1.1803777461835893e-05, "loss": 0.7841, "step": 3999 }, { "epoch": 0.2815909890883492, "grad_norm": 1.8341703414916992, "learning_rate": 1.1802616390030407e-05, "loss": 0.597, "step": 4000 }, { "epoch": 0.28166138683562125, "grad_norm": 1.7756075859069824, "learning_rate": 1.1801455068532545e-05, "loss": 0.6332, "step": 4001 }, { "epoch": 0.28173178458289333, "grad_norm": 1.5782862901687622, "learning_rate": 1.1800293497402687e-05, "loss": 0.6925, "step": 4002 }, { "epoch": 0.2818021823301654, "grad_norm": 1.4802216291427612, "learning_rate": 1.1799131676701222e-05, "loss": 0.8862, "step": 4003 }, { "epoch": 0.28187258007743754, "grad_norm": 1.8656500577926636, "learning_rate": 1.1797969606488555e-05, "loss": 0.836, "step": 4004 }, { "epoch": 0.2819429778247096, "grad_norm": 2.19840669631958, "learning_rate": 1.1796807286825103e-05, "loss": 0.7531, "step": 4005 }, { "epoch": 0.2820133755719817, "grad_norm": 1.7757377624511719, "learning_rate": 1.1795644717771296e-05, "loss": 0.7293, "step": 4006 }, { "epoch": 0.28208377331925377, "grad_norm": 1.485701322555542, "learning_rate": 1.1794481899387578e-05, "loss": 0.7077, "step": 4007 }, { "epoch": 0.28215417106652585, "grad_norm": 2.251635789871216, "learning_rate": 1.1793318831734401e-05, "loss": 0.7226, "step": 4008 }, { "epoch": 0.282224568813798, "grad_norm": 1.7505569458007812, "learning_rate": 1.179215551487224e-05, "loss": 0.7163, "step": 4009 }, { "epoch": 0.28229496656107006, "grad_norm": 2.0857231616973877, "learning_rate": 1.1790991948861573e-05, "loss": 0.8856, "step": 4010 }, { "epoch": 0.28236536430834214, "grad_norm": 1.8925623893737793, "learning_rate": 1.1789828133762894e-05, "loss": 0.749, "step": 4011 }, { "epoch": 0.2824357620556142, "grad_norm": 1.9142769575119019, "learning_rate": 1.1788664069636712e-05, "loss": 0.7937, "step": 4012 }, { "epoch": 0.2825061598028863, "grad_norm": 1.838640570640564, "learning_rate": 1.1787499756543551e-05, "loss": 0.7155, "step": 4013 }, { "epoch": 0.28257655755015837, "grad_norm": 2.449157476425171, "learning_rate": 1.178633519454394e-05, "loss": 0.7899, "step": 4014 }, { "epoch": 0.2826469552974305, "grad_norm": 1.8678617477416992, "learning_rate": 1.1785170383698427e-05, "loss": 0.774, "step": 4015 }, { "epoch": 0.2827173530447026, "grad_norm": 1.8825284242630005, "learning_rate": 1.1784005324067573e-05, "loss": 0.666, "step": 4016 }, { "epoch": 0.28278775079197466, "grad_norm": 1.6521753072738647, "learning_rate": 1.178284001571195e-05, "loss": 0.6359, "step": 4017 }, { "epoch": 0.28285814853924673, "grad_norm": 1.8723312616348267, "learning_rate": 1.1781674458692142e-05, "loss": 0.7366, "step": 4018 }, { "epoch": 0.2829285462865188, "grad_norm": 1.858221411705017, "learning_rate": 1.178050865306875e-05, "loss": 0.7957, "step": 4019 }, { "epoch": 0.28299894403379094, "grad_norm": 1.8949215412139893, "learning_rate": 1.1779342598902381e-05, "loss": 0.68, "step": 4020 }, { "epoch": 0.283069341781063, "grad_norm": 1.9945570230484009, "learning_rate": 1.1778176296253664e-05, "loss": 0.7071, "step": 4021 }, { "epoch": 0.2831397395283351, "grad_norm": 1.8430193662643433, "learning_rate": 1.1777009745183233e-05, "loss": 0.7698, "step": 4022 }, { "epoch": 0.2832101372756072, "grad_norm": 2.134927749633789, "learning_rate": 1.1775842945751739e-05, "loss": 0.722, "step": 4023 }, { "epoch": 0.28328053502287925, "grad_norm": 1.9295897483825684, "learning_rate": 1.1774675898019844e-05, "loss": 0.6868, "step": 4024 }, { "epoch": 0.28335093277015133, "grad_norm": 2.17865252494812, "learning_rate": 1.1773508602048227e-05, "loss": 0.769, "step": 4025 }, { "epoch": 0.28342133051742346, "grad_norm": 1.9356571435928345, "learning_rate": 1.1772341057897573e-05, "loss": 0.8004, "step": 4026 }, { "epoch": 0.28349172826469554, "grad_norm": 1.8020119667053223, "learning_rate": 1.1771173265628586e-05, "loss": 0.5957, "step": 4027 }, { "epoch": 0.2835621260119676, "grad_norm": 1.735824704170227, "learning_rate": 1.1770005225301981e-05, "loss": 0.6663, "step": 4028 }, { "epoch": 0.2836325237592397, "grad_norm": 2.059884548187256, "learning_rate": 1.1768836936978481e-05, "loss": 0.7285, "step": 4029 }, { "epoch": 0.2837029215065118, "grad_norm": 2.001176118850708, "learning_rate": 1.1767668400718834e-05, "loss": 0.7449, "step": 4030 }, { "epoch": 0.2837733192537839, "grad_norm": 1.8828545808792114, "learning_rate": 1.1766499616583787e-05, "loss": 0.6438, "step": 4031 }, { "epoch": 0.283843717001056, "grad_norm": 2.0976760387420654, "learning_rate": 1.1765330584634106e-05, "loss": 0.711, "step": 4032 }, { "epoch": 0.28391411474832806, "grad_norm": 1.8128604888916016, "learning_rate": 1.1764161304930577e-05, "loss": 0.7218, "step": 4033 }, { "epoch": 0.28398451249560014, "grad_norm": 1.742648720741272, "learning_rate": 1.1762991777533985e-05, "loss": 0.655, "step": 4034 }, { "epoch": 0.2840549102428722, "grad_norm": 1.7699403762817383, "learning_rate": 1.1761822002505134e-05, "loss": 0.5665, "step": 4035 }, { "epoch": 0.2841253079901443, "grad_norm": 1.640975832939148, "learning_rate": 1.1760651979904847e-05, "loss": 0.7414, "step": 4036 }, { "epoch": 0.2841957057374164, "grad_norm": 1.8709238767623901, "learning_rate": 1.1759481709793953e-05, "loss": 0.773, "step": 4037 }, { "epoch": 0.2842661034846885, "grad_norm": 1.7185194492340088, "learning_rate": 1.1758311192233293e-05, "loss": 0.6993, "step": 4038 }, { "epoch": 0.2843365012319606, "grad_norm": 1.9718369245529175, "learning_rate": 1.1757140427283726e-05, "loss": 0.7537, "step": 4039 }, { "epoch": 0.28440689897923266, "grad_norm": 2.19683575630188, "learning_rate": 1.1755969415006118e-05, "loss": 0.7364, "step": 4040 }, { "epoch": 0.28447729672650474, "grad_norm": 1.7461477518081665, "learning_rate": 1.1754798155461353e-05, "loss": 0.6148, "step": 4041 }, { "epoch": 0.2845476944737768, "grad_norm": 1.7108646631240845, "learning_rate": 1.1753626648710327e-05, "loss": 0.6388, "step": 4042 }, { "epoch": 0.28461809222104895, "grad_norm": 1.8955973386764526, "learning_rate": 1.1752454894813946e-05, "loss": 0.738, "step": 4043 }, { "epoch": 0.284688489968321, "grad_norm": 2.4048807621002197, "learning_rate": 1.1751282893833132e-05, "loss": 0.6608, "step": 4044 }, { "epoch": 0.2847588877155931, "grad_norm": 3.091808319091797, "learning_rate": 1.1750110645828816e-05, "loss": 0.7293, "step": 4045 }, { "epoch": 0.2848292854628652, "grad_norm": 2.143312931060791, "learning_rate": 1.1748938150861944e-05, "loss": 0.7031, "step": 4046 }, { "epoch": 0.28489968321013726, "grad_norm": 1.890505075454712, "learning_rate": 1.174776540899348e-05, "loss": 0.7357, "step": 4047 }, { "epoch": 0.2849700809574094, "grad_norm": 1.9965007305145264, "learning_rate": 1.174659242028439e-05, "loss": 0.7793, "step": 4048 }, { "epoch": 0.28504047870468147, "grad_norm": 2.086289644241333, "learning_rate": 1.1745419184795662e-05, "loss": 0.7478, "step": 4049 }, { "epoch": 0.28511087645195354, "grad_norm": 2.034682273864746, "learning_rate": 1.1744245702588293e-05, "loss": 0.8296, "step": 4050 }, { "epoch": 0.2851812741992256, "grad_norm": 2.088881492614746, "learning_rate": 1.1743071973723293e-05, "loss": 0.7195, "step": 4051 }, { "epoch": 0.2852516719464977, "grad_norm": 1.8486860990524292, "learning_rate": 1.1741897998261687e-05, "loss": 0.9014, "step": 4052 }, { "epoch": 0.2853220696937698, "grad_norm": 1.4136974811553955, "learning_rate": 1.1740723776264507e-05, "loss": 0.7527, "step": 4053 }, { "epoch": 0.2853924674410419, "grad_norm": 2.1723341941833496, "learning_rate": 1.1739549307792807e-05, "loss": 0.7283, "step": 4054 }, { "epoch": 0.285462865188314, "grad_norm": 1.496518611907959, "learning_rate": 1.1738374592907644e-05, "loss": 0.6303, "step": 4055 }, { "epoch": 0.28553326293558606, "grad_norm": 2.097956418991089, "learning_rate": 1.1737199631670096e-05, "loss": 0.7472, "step": 4056 }, { "epoch": 0.28560366068285814, "grad_norm": 2.025075674057007, "learning_rate": 1.173602442414125e-05, "loss": 0.8614, "step": 4057 }, { "epoch": 0.2856740584301302, "grad_norm": 1.600594162940979, "learning_rate": 1.1734848970382203e-05, "loss": 0.6097, "step": 4058 }, { "epoch": 0.28574445617740235, "grad_norm": 2.1172478199005127, "learning_rate": 1.173367327045407e-05, "loss": 0.7707, "step": 4059 }, { "epoch": 0.28581485392467443, "grad_norm": 1.8950347900390625, "learning_rate": 1.1732497324417976e-05, "loss": 0.7105, "step": 4060 }, { "epoch": 0.2858852516719465, "grad_norm": 2.4193332195281982, "learning_rate": 1.1731321132335062e-05, "loss": 0.7467, "step": 4061 }, { "epoch": 0.2859556494192186, "grad_norm": 1.9347636699676514, "learning_rate": 1.1730144694266476e-05, "loss": 0.856, "step": 4062 }, { "epoch": 0.28602604716649066, "grad_norm": 2.563082456588745, "learning_rate": 1.1728968010273385e-05, "loss": 0.6564, "step": 4063 }, { "epoch": 0.28609644491376274, "grad_norm": 2.0575592517852783, "learning_rate": 1.1727791080416961e-05, "loss": 0.7752, "step": 4064 }, { "epoch": 0.28616684266103487, "grad_norm": 1.7505332231521606, "learning_rate": 1.17266139047584e-05, "loss": 0.6376, "step": 4065 }, { "epoch": 0.28623724040830695, "grad_norm": 1.9960609674453735, "learning_rate": 1.1725436483358902e-05, "loss": 0.7204, "step": 4066 }, { "epoch": 0.286307638155579, "grad_norm": 2.1367080211639404, "learning_rate": 1.172425881627968e-05, "loss": 0.7179, "step": 4067 }, { "epoch": 0.2863780359028511, "grad_norm": 7.751003265380859, "learning_rate": 1.1723080903581963e-05, "loss": 0.6828, "step": 4068 }, { "epoch": 0.2864484336501232, "grad_norm": 1.9674551486968994, "learning_rate": 1.1721902745326992e-05, "loss": 0.6899, "step": 4069 }, { "epoch": 0.28651883139739526, "grad_norm": 1.7925376892089844, "learning_rate": 1.1720724341576023e-05, "loss": 0.6373, "step": 4070 }, { "epoch": 0.2865892291446674, "grad_norm": 1.7596532106399536, "learning_rate": 1.1719545692390318e-05, "loss": 0.7399, "step": 4071 }, { "epoch": 0.28665962689193947, "grad_norm": 2.814255714416504, "learning_rate": 1.1718366797831161e-05, "loss": 0.7209, "step": 4072 }, { "epoch": 0.28673002463921154, "grad_norm": 1.795639157295227, "learning_rate": 1.1717187657959838e-05, "loss": 0.7598, "step": 4073 }, { "epoch": 0.2868004223864836, "grad_norm": 1.6410517692565918, "learning_rate": 1.1716008272837658e-05, "loss": 0.6982, "step": 4074 }, { "epoch": 0.2868708201337557, "grad_norm": 1.7311233282089233, "learning_rate": 1.1714828642525938e-05, "loss": 0.7243, "step": 4075 }, { "epoch": 0.28694121788102783, "grad_norm": 1.5485459566116333, "learning_rate": 1.1713648767086004e-05, "loss": 0.6689, "step": 4076 }, { "epoch": 0.2870116156282999, "grad_norm": 1.8302311897277832, "learning_rate": 1.1712468646579203e-05, "loss": 0.8418, "step": 4077 }, { "epoch": 0.287082013375572, "grad_norm": 1.9976918697357178, "learning_rate": 1.171128828106689e-05, "loss": 0.6669, "step": 4078 }, { "epoch": 0.28715241112284406, "grad_norm": 1.8388231992721558, "learning_rate": 1.1710107670610432e-05, "loss": 0.7346, "step": 4079 }, { "epoch": 0.28722280887011614, "grad_norm": 1.9619759321212769, "learning_rate": 1.170892681527121e-05, "loss": 0.7473, "step": 4080 }, { "epoch": 0.2872932066173882, "grad_norm": 1.7974724769592285, "learning_rate": 1.170774571511062e-05, "loss": 0.7634, "step": 4081 }, { "epoch": 0.28736360436466035, "grad_norm": 1.6995339393615723, "learning_rate": 1.1706564370190065e-05, "loss": 0.7059, "step": 4082 }, { "epoch": 0.28743400211193243, "grad_norm": 2.764345407485962, "learning_rate": 1.1705382780570965e-05, "loss": 0.6792, "step": 4083 }, { "epoch": 0.2875043998592045, "grad_norm": 1.7557584047317505, "learning_rate": 1.1704200946314754e-05, "loss": 0.7437, "step": 4084 }, { "epoch": 0.2875747976064766, "grad_norm": 2.3604981899261475, "learning_rate": 1.1703018867482876e-05, "loss": 0.7093, "step": 4085 }, { "epoch": 0.28764519535374866, "grad_norm": 1.8898390531539917, "learning_rate": 1.1701836544136788e-05, "loss": 0.7688, "step": 4086 }, { "epoch": 0.2877155931010208, "grad_norm": 2.221972942352295, "learning_rate": 1.1700653976337959e-05, "loss": 0.7259, "step": 4087 }, { "epoch": 0.28778599084829287, "grad_norm": 2.01739764213562, "learning_rate": 1.1699471164147873e-05, "loss": 0.8964, "step": 4088 }, { "epoch": 0.28785638859556495, "grad_norm": 1.6156858205795288, "learning_rate": 1.1698288107628023e-05, "loss": 0.7506, "step": 4089 }, { "epoch": 0.287926786342837, "grad_norm": 2.967663049697876, "learning_rate": 1.169710480683992e-05, "loss": 0.7556, "step": 4090 }, { "epoch": 0.2879971840901091, "grad_norm": 1.5559872388839722, "learning_rate": 1.1695921261845087e-05, "loss": 0.7526, "step": 4091 }, { "epoch": 0.2880675818373812, "grad_norm": 1.861844539642334, "learning_rate": 1.1694737472705054e-05, "loss": 0.6904, "step": 4092 }, { "epoch": 0.2881379795846533, "grad_norm": 1.8878048658370972, "learning_rate": 1.1693553439481365e-05, "loss": 0.7141, "step": 4093 }, { "epoch": 0.2882083773319254, "grad_norm": 2.221729040145874, "learning_rate": 1.1692369162235583e-05, "loss": 0.7905, "step": 4094 }, { "epoch": 0.28827877507919747, "grad_norm": 1.9637153148651123, "learning_rate": 1.169118464102928e-05, "loss": 0.6249, "step": 4095 }, { "epoch": 0.28834917282646955, "grad_norm": 2.8173906803131104, "learning_rate": 1.1689999875924037e-05, "loss": 0.6612, "step": 4096 }, { "epoch": 0.2884195705737416, "grad_norm": 1.6310161352157593, "learning_rate": 1.1688814866981455e-05, "loss": 0.6118, "step": 4097 }, { "epoch": 0.2884899683210137, "grad_norm": 2.0276479721069336, "learning_rate": 1.1687629614263142e-05, "loss": 0.6564, "step": 4098 }, { "epoch": 0.28856036606828583, "grad_norm": 1.793845295906067, "learning_rate": 1.1686444117830719e-05, "loss": 0.7482, "step": 4099 }, { "epoch": 0.2886307638155579, "grad_norm": 1.9694607257843018, "learning_rate": 1.1685258377745822e-05, "loss": 0.9192, "step": 4100 }, { "epoch": 0.28870116156283, "grad_norm": 2.019437789916992, "learning_rate": 1.1684072394070099e-05, "loss": 0.6934, "step": 4101 }, { "epoch": 0.28877155931010207, "grad_norm": 1.987646460533142, "learning_rate": 1.1682886166865209e-05, "loss": 0.7105, "step": 4102 }, { "epoch": 0.28884195705737414, "grad_norm": 1.940624713897705, "learning_rate": 1.1681699696192826e-05, "loss": 0.729, "step": 4103 }, { "epoch": 0.2889123548046463, "grad_norm": 1.83932626247406, "learning_rate": 1.1680512982114637e-05, "loss": 0.7269, "step": 4104 }, { "epoch": 0.28898275255191835, "grad_norm": 2.0080509185791016, "learning_rate": 1.167932602469234e-05, "loss": 0.6819, "step": 4105 }, { "epoch": 0.28905315029919043, "grad_norm": 1.8404968976974487, "learning_rate": 1.1678138823987642e-05, "loss": 0.6803, "step": 4106 }, { "epoch": 0.2891235480464625, "grad_norm": 1.9653533697128296, "learning_rate": 1.1676951380062273e-05, "loss": 0.8435, "step": 4107 }, { "epoch": 0.2891939457937346, "grad_norm": 2.887413501739502, "learning_rate": 1.1675763692977964e-05, "loss": 0.7546, "step": 4108 }, { "epoch": 0.28926434354100666, "grad_norm": 2.2821836471557617, "learning_rate": 1.1674575762796468e-05, "loss": 0.6127, "step": 4109 }, { "epoch": 0.2893347412882788, "grad_norm": 1.9545385837554932, "learning_rate": 1.1673387589579541e-05, "loss": 0.7108, "step": 4110 }, { "epoch": 0.2894051390355509, "grad_norm": 1.6905745267868042, "learning_rate": 1.1672199173388964e-05, "loss": 0.9445, "step": 4111 }, { "epoch": 0.28947553678282295, "grad_norm": 2.139296054840088, "learning_rate": 1.1671010514286518e-05, "loss": 0.6659, "step": 4112 }, { "epoch": 0.28954593453009503, "grad_norm": 2.143155097961426, "learning_rate": 1.1669821612334008e-05, "loss": 0.772, "step": 4113 }, { "epoch": 0.2896163322773671, "grad_norm": 1.9737684726715088, "learning_rate": 1.1668632467593242e-05, "loss": 0.7121, "step": 4114 }, { "epoch": 0.28968673002463924, "grad_norm": 1.7130206823349, "learning_rate": 1.1667443080126044e-05, "loss": 0.717, "step": 4115 }, { "epoch": 0.2897571277719113, "grad_norm": 1.9802327156066895, "learning_rate": 1.1666253449994256e-05, "loss": 0.8401, "step": 4116 }, { "epoch": 0.2898275255191834, "grad_norm": 2.0832936763763428, "learning_rate": 1.1665063577259723e-05, "loss": 0.7295, "step": 4117 }, { "epoch": 0.28989792326645547, "grad_norm": 1.9010951519012451, "learning_rate": 1.1663873461984312e-05, "loss": 0.7317, "step": 4118 }, { "epoch": 0.28996832101372755, "grad_norm": 1.6994128227233887, "learning_rate": 1.1662683104229892e-05, "loss": 0.8083, "step": 4119 }, { "epoch": 0.2900387187609996, "grad_norm": 1.8920795917510986, "learning_rate": 1.1661492504058358e-05, "loss": 0.8131, "step": 4120 }, { "epoch": 0.29010911650827176, "grad_norm": 2.2870404720306396, "learning_rate": 1.1660301661531607e-05, "loss": 0.6408, "step": 4121 }, { "epoch": 0.29017951425554384, "grad_norm": 1.8727452754974365, "learning_rate": 1.1659110576711551e-05, "loss": 0.6458, "step": 4122 }, { "epoch": 0.2902499120028159, "grad_norm": 2.091137170791626, "learning_rate": 1.1657919249660116e-05, "loss": 0.741, "step": 4123 }, { "epoch": 0.290320309750088, "grad_norm": 1.6697039604187012, "learning_rate": 1.1656727680439241e-05, "loss": 0.9141, "step": 4124 }, { "epoch": 0.29039070749736007, "grad_norm": 2.031008720397949, "learning_rate": 1.1655535869110876e-05, "loss": 0.646, "step": 4125 }, { "epoch": 0.29046110524463215, "grad_norm": 1.9340420961380005, "learning_rate": 1.1654343815736988e-05, "loss": 0.6402, "step": 4126 }, { "epoch": 0.2905315029919043, "grad_norm": 1.7493561506271362, "learning_rate": 1.1653151520379546e-05, "loss": 0.6812, "step": 4127 }, { "epoch": 0.29060190073917636, "grad_norm": 2.0646262168884277, "learning_rate": 1.1651958983100543e-05, "loss": 0.8557, "step": 4128 }, { "epoch": 0.29067229848644843, "grad_norm": 2.0039403438568115, "learning_rate": 1.165076620396198e-05, "loss": 0.6912, "step": 4129 }, { "epoch": 0.2907426962337205, "grad_norm": 1.7454853057861328, "learning_rate": 1.1649573183025872e-05, "loss": 0.6557, "step": 4130 }, { "epoch": 0.2908130939809926, "grad_norm": 1.8740512132644653, "learning_rate": 1.164837992035424e-05, "loss": 0.7983, "step": 4131 }, { "epoch": 0.2908834917282647, "grad_norm": 2.163442611694336, "learning_rate": 1.1647186416009128e-05, "loss": 0.746, "step": 4132 }, { "epoch": 0.2909538894755368, "grad_norm": 1.8060698509216309, "learning_rate": 1.1645992670052587e-05, "loss": 0.6734, "step": 4133 }, { "epoch": 0.2910242872228089, "grad_norm": 2.0593039989471436, "learning_rate": 1.1644798682546676e-05, "loss": 0.6386, "step": 4134 }, { "epoch": 0.29109468497008095, "grad_norm": 2.1667680740356445, "learning_rate": 1.1643604453553479e-05, "loss": 0.6903, "step": 4135 }, { "epoch": 0.29116508271735303, "grad_norm": 2.002692699432373, "learning_rate": 1.1642409983135077e-05, "loss": 0.7464, "step": 4136 }, { "epoch": 0.2912354804646251, "grad_norm": 1.7677979469299316, "learning_rate": 1.1641215271353577e-05, "loss": 0.7597, "step": 4137 }, { "epoch": 0.29130587821189724, "grad_norm": 2.087491989135742, "learning_rate": 1.1640020318271093e-05, "loss": 0.6604, "step": 4138 }, { "epoch": 0.2913762759591693, "grad_norm": 1.9816242456436157, "learning_rate": 1.163882512394975e-05, "loss": 0.6654, "step": 4139 }, { "epoch": 0.2914466737064414, "grad_norm": 1.800615668296814, "learning_rate": 1.1637629688451686e-05, "loss": 0.8, "step": 4140 }, { "epoch": 0.29151707145371347, "grad_norm": 2.0550265312194824, "learning_rate": 1.1636434011839055e-05, "loss": 0.8415, "step": 4141 }, { "epoch": 0.29158746920098555, "grad_norm": 1.549818754196167, "learning_rate": 1.1635238094174022e-05, "loss": 0.6821, "step": 4142 }, { "epoch": 0.2916578669482577, "grad_norm": 1.6564946174621582, "learning_rate": 1.1634041935518759e-05, "loss": 0.7022, "step": 4143 }, { "epoch": 0.29172826469552976, "grad_norm": 1.8567675352096558, "learning_rate": 1.1632845535935462e-05, "loss": 0.808, "step": 4144 }, { "epoch": 0.29179866244280184, "grad_norm": 2.301088809967041, "learning_rate": 1.163164889548633e-05, "loss": 0.7618, "step": 4145 }, { "epoch": 0.2918690601900739, "grad_norm": 4.633566856384277, "learning_rate": 1.1630452014233576e-05, "loss": 0.691, "step": 4146 }, { "epoch": 0.291939457937346, "grad_norm": 1.7803915739059448, "learning_rate": 1.1629254892239429e-05, "loss": 0.7834, "step": 4147 }, { "epoch": 0.29200985568461807, "grad_norm": 2.087001323699951, "learning_rate": 1.1628057529566126e-05, "loss": 0.6958, "step": 4148 }, { "epoch": 0.2920802534318902, "grad_norm": 1.8248859643936157, "learning_rate": 1.1626859926275924e-05, "loss": 0.7604, "step": 4149 }, { "epoch": 0.2921506511791623, "grad_norm": 1.8746949434280396, "learning_rate": 1.1625662082431084e-05, "loss": 0.7072, "step": 4150 }, { "epoch": 0.29222104892643436, "grad_norm": 1.7796236276626587, "learning_rate": 1.162446399809388e-05, "loss": 0.7331, "step": 4151 }, { "epoch": 0.29229144667370643, "grad_norm": 1.8626784086227417, "learning_rate": 1.1623265673326605e-05, "loss": 0.5887, "step": 4152 }, { "epoch": 0.2923618444209785, "grad_norm": 2.178006410598755, "learning_rate": 1.162206710819156e-05, "loss": 0.8102, "step": 4153 }, { "epoch": 0.2924322421682506, "grad_norm": 1.8477708101272583, "learning_rate": 1.1620868302751063e-05, "loss": 0.7255, "step": 4154 }, { "epoch": 0.2925026399155227, "grad_norm": 1.9449650049209595, "learning_rate": 1.1619669257067435e-05, "loss": 0.6901, "step": 4155 }, { "epoch": 0.2925730376627948, "grad_norm": 2.1117334365844727, "learning_rate": 1.1618469971203021e-05, "loss": 0.6986, "step": 4156 }, { "epoch": 0.2926434354100669, "grad_norm": 1.8282201290130615, "learning_rate": 1.161727044522017e-05, "loss": 0.7745, "step": 4157 }, { "epoch": 0.29271383315733895, "grad_norm": 1.9957741498947144, "learning_rate": 1.1616070679181246e-05, "loss": 0.6785, "step": 4158 }, { "epoch": 0.29278423090461103, "grad_norm": 1.9689723253250122, "learning_rate": 1.1614870673148627e-05, "loss": 0.6542, "step": 4159 }, { "epoch": 0.29285462865188316, "grad_norm": 1.7150307893753052, "learning_rate": 1.1613670427184704e-05, "loss": 0.642, "step": 4160 }, { "epoch": 0.29292502639915524, "grad_norm": 1.8678089380264282, "learning_rate": 1.1612469941351875e-05, "loss": 0.8182, "step": 4161 }, { "epoch": 0.2929954241464273, "grad_norm": 1.7645262479782104, "learning_rate": 1.1611269215712556e-05, "loss": 0.7082, "step": 4162 }, { "epoch": 0.2930658218936994, "grad_norm": 2.56716251373291, "learning_rate": 1.1610068250329176e-05, "loss": 0.7951, "step": 4163 }, { "epoch": 0.2931362196409715, "grad_norm": 2.0943613052368164, "learning_rate": 1.160886704526417e-05, "loss": 0.6985, "step": 4164 }, { "epoch": 0.29320661738824355, "grad_norm": 1.7305212020874023, "learning_rate": 1.1607665600579995e-05, "loss": 0.6468, "step": 4165 }, { "epoch": 0.2932770151355157, "grad_norm": 1.950076937675476, "learning_rate": 1.1606463916339113e-05, "loss": 0.7443, "step": 4166 }, { "epoch": 0.29334741288278776, "grad_norm": 1.5759586095809937, "learning_rate": 1.1605261992603998e-05, "loss": 0.677, "step": 4167 }, { "epoch": 0.29341781063005984, "grad_norm": 1.9350392818450928, "learning_rate": 1.1604059829437141e-05, "loss": 0.5944, "step": 4168 }, { "epoch": 0.2934882083773319, "grad_norm": 1.7253186702728271, "learning_rate": 1.1602857426901046e-05, "loss": 0.8057, "step": 4169 }, { "epoch": 0.293558606124604, "grad_norm": 1.7164831161499023, "learning_rate": 1.1601654785058223e-05, "loss": 0.6907, "step": 4170 }, { "epoch": 0.2936290038718761, "grad_norm": 1.6563913822174072, "learning_rate": 1.1600451903971201e-05, "loss": 0.8005, "step": 4171 }, { "epoch": 0.2936994016191482, "grad_norm": 1.7347633838653564, "learning_rate": 1.1599248783702517e-05, "loss": 0.7064, "step": 4172 }, { "epoch": 0.2937697993664203, "grad_norm": 1.6658159494400024, "learning_rate": 1.1598045424314725e-05, "loss": 0.6805, "step": 4173 }, { "epoch": 0.29384019711369236, "grad_norm": 1.8879221677780151, "learning_rate": 1.1596841825870386e-05, "loss": 0.7016, "step": 4174 }, { "epoch": 0.29391059486096444, "grad_norm": 1.8998405933380127, "learning_rate": 1.1595637988432076e-05, "loss": 0.6844, "step": 4175 }, { "epoch": 0.2939809926082365, "grad_norm": 1.745285153388977, "learning_rate": 1.1594433912062388e-05, "loss": 0.8175, "step": 4176 }, { "epoch": 0.29405139035550865, "grad_norm": 1.8201717138290405, "learning_rate": 1.1593229596823918e-05, "loss": 0.7172, "step": 4177 }, { "epoch": 0.2941217881027807, "grad_norm": 1.8032022714614868, "learning_rate": 1.159202504277928e-05, "loss": 0.8116, "step": 4178 }, { "epoch": 0.2941921858500528, "grad_norm": 1.7052494287490845, "learning_rate": 1.1590820249991106e-05, "loss": 0.7858, "step": 4179 }, { "epoch": 0.2942625835973249, "grad_norm": 1.9494284391403198, "learning_rate": 1.1589615218522026e-05, "loss": 0.8535, "step": 4180 }, { "epoch": 0.29433298134459696, "grad_norm": 1.773877739906311, "learning_rate": 1.1588409948434694e-05, "loss": 0.7451, "step": 4181 }, { "epoch": 0.29440337909186903, "grad_norm": 1.7964597940444946, "learning_rate": 1.1587204439791774e-05, "loss": 0.6487, "step": 4182 }, { "epoch": 0.29447377683914117, "grad_norm": 1.847320318222046, "learning_rate": 1.1585998692655943e-05, "loss": 0.7303, "step": 4183 }, { "epoch": 0.29454417458641324, "grad_norm": 1.9165500402450562, "learning_rate": 1.1584792707089885e-05, "loss": 0.8284, "step": 4184 }, { "epoch": 0.2946145723336853, "grad_norm": 1.9532299041748047, "learning_rate": 1.15835864831563e-05, "loss": 0.7341, "step": 4185 }, { "epoch": 0.2946849700809574, "grad_norm": 1.7085585594177246, "learning_rate": 1.1582380020917904e-05, "loss": 0.7204, "step": 4186 }, { "epoch": 0.2947553678282295, "grad_norm": 1.71831476688385, "learning_rate": 1.158117332043742e-05, "loss": 0.6988, "step": 4187 }, { "epoch": 0.2948257655755016, "grad_norm": 1.91995108127594, "learning_rate": 1.1579966381777588e-05, "loss": 0.6642, "step": 4188 }, { "epoch": 0.2948961633227737, "grad_norm": 1.81782865524292, "learning_rate": 1.1578759205001155e-05, "loss": 0.7302, "step": 4189 }, { "epoch": 0.29496656107004576, "grad_norm": 2.3147835731506348, "learning_rate": 1.1577551790170884e-05, "loss": 0.783, "step": 4190 }, { "epoch": 0.29503695881731784, "grad_norm": 2.050804853439331, "learning_rate": 1.1576344137349548e-05, "loss": 0.8828, "step": 4191 }, { "epoch": 0.2951073565645899, "grad_norm": 2.0761051177978516, "learning_rate": 1.1575136246599938e-05, "loss": 0.6539, "step": 4192 }, { "epoch": 0.295177754311862, "grad_norm": 2.17378830909729, "learning_rate": 1.1573928117984852e-05, "loss": 0.7561, "step": 4193 }, { "epoch": 0.29524815205913413, "grad_norm": 1.427598476409912, "learning_rate": 1.1572719751567097e-05, "loss": 0.586, "step": 4194 }, { "epoch": 0.2953185498064062, "grad_norm": 1.9893481731414795, "learning_rate": 1.1571511147409506e-05, "loss": 0.7115, "step": 4195 }, { "epoch": 0.2953889475536783, "grad_norm": 1.9446420669555664, "learning_rate": 1.1570302305574905e-05, "loss": 0.7192, "step": 4196 }, { "epoch": 0.29545934530095036, "grad_norm": 2.028529167175293, "learning_rate": 1.1569093226126152e-05, "loss": 0.6063, "step": 4197 }, { "epoch": 0.29552974304822244, "grad_norm": 2.4359123706817627, "learning_rate": 1.1567883909126102e-05, "loss": 0.7819, "step": 4198 }, { "epoch": 0.29560014079549457, "grad_norm": 2.1275901794433594, "learning_rate": 1.156667435463763e-05, "loss": 0.7689, "step": 4199 }, { "epoch": 0.29567053854276665, "grad_norm": 1.8817106485366821, "learning_rate": 1.1565464562723624e-05, "loss": 0.7682, "step": 4200 }, { "epoch": 0.2957409362900387, "grad_norm": 2.0288326740264893, "learning_rate": 1.156425453344698e-05, "loss": 0.6422, "step": 4201 }, { "epoch": 0.2958113340373108, "grad_norm": 2.18570876121521, "learning_rate": 1.156304426687061e-05, "loss": 0.7223, "step": 4202 }, { "epoch": 0.2958817317845829, "grad_norm": 2.2534477710723877, "learning_rate": 1.1561833763057435e-05, "loss": 0.7341, "step": 4203 }, { "epoch": 0.29595212953185496, "grad_norm": 1.8023364543914795, "learning_rate": 1.1560623022070392e-05, "loss": 0.7043, "step": 4204 }, { "epoch": 0.2960225272791271, "grad_norm": 2.182415246963501, "learning_rate": 1.1559412043972426e-05, "loss": 0.6867, "step": 4205 }, { "epoch": 0.29609292502639917, "grad_norm": 1.9917776584625244, "learning_rate": 1.1558200828826501e-05, "loss": 0.7112, "step": 4206 }, { "epoch": 0.29616332277367124, "grad_norm": 1.6527243852615356, "learning_rate": 1.1556989376695587e-05, "loss": 0.6748, "step": 4207 }, { "epoch": 0.2962337205209433, "grad_norm": 1.7569165229797363, "learning_rate": 1.1555777687642667e-05, "loss": 0.5539, "step": 4208 }, { "epoch": 0.2963041182682154, "grad_norm": 2.6174957752227783, "learning_rate": 1.1554565761730738e-05, "loss": 0.8157, "step": 4209 }, { "epoch": 0.2963745160154875, "grad_norm": 2.1233229637145996, "learning_rate": 1.1553353599022812e-05, "loss": 0.7694, "step": 4210 }, { "epoch": 0.2964449137627596, "grad_norm": 2.2643871307373047, "learning_rate": 1.1552141199581906e-05, "loss": 0.6073, "step": 4211 }, { "epoch": 0.2965153115100317, "grad_norm": 1.6998854875564575, "learning_rate": 1.155092856347106e-05, "loss": 0.7159, "step": 4212 }, { "epoch": 0.29658570925730376, "grad_norm": 2.094691276550293, "learning_rate": 1.1549715690753315e-05, "loss": 0.6835, "step": 4213 }, { "epoch": 0.29665610700457584, "grad_norm": 1.8556499481201172, "learning_rate": 1.154850258149173e-05, "loss": 0.8128, "step": 4214 }, { "epoch": 0.2967265047518479, "grad_norm": 1.7647641897201538, "learning_rate": 1.1547289235749376e-05, "loss": 0.7159, "step": 4215 }, { "epoch": 0.29679690249912005, "grad_norm": 2.043081045150757, "learning_rate": 1.154607565358934e-05, "loss": 0.6519, "step": 4216 }, { "epoch": 0.29686730024639213, "grad_norm": 1.9042656421661377, "learning_rate": 1.1544861835074709e-05, "loss": 0.7325, "step": 4217 }, { "epoch": 0.2969376979936642, "grad_norm": 1.7995637655258179, "learning_rate": 1.1543647780268597e-05, "loss": 0.6594, "step": 4218 }, { "epoch": 0.2970080957409363, "grad_norm": 2.0484862327575684, "learning_rate": 1.1542433489234123e-05, "loss": 0.7238, "step": 4219 }, { "epoch": 0.29707849348820836, "grad_norm": 1.6586052179336548, "learning_rate": 1.1541218962034415e-05, "loss": 0.6797, "step": 4220 }, { "epoch": 0.29714889123548044, "grad_norm": 2.2509992122650146, "learning_rate": 1.1540004198732622e-05, "loss": 0.8347, "step": 4221 }, { "epoch": 0.29721928898275257, "grad_norm": 2.2073113918304443, "learning_rate": 1.15387891993919e-05, "loss": 0.7754, "step": 4222 }, { "epoch": 0.29728968673002465, "grad_norm": 1.8321969509124756, "learning_rate": 1.1537573964075417e-05, "loss": 0.6962, "step": 4223 }, { "epoch": 0.2973600844772967, "grad_norm": 2.197364330291748, "learning_rate": 1.1536358492846351e-05, "loss": 0.7485, "step": 4224 }, { "epoch": 0.2974304822245688, "grad_norm": 1.976435899734497, "learning_rate": 1.1535142785767902e-05, "loss": 0.7007, "step": 4225 }, { "epoch": 0.2975008799718409, "grad_norm": 1.7424243688583374, "learning_rate": 1.1533926842903271e-05, "loss": 0.7407, "step": 4226 }, { "epoch": 0.297571277719113, "grad_norm": 1.8767036199569702, "learning_rate": 1.1532710664315678e-05, "loss": 0.6303, "step": 4227 }, { "epoch": 0.2976416754663851, "grad_norm": 1.6602106094360352, "learning_rate": 1.1531494250068353e-05, "loss": 0.6469, "step": 4228 }, { "epoch": 0.29771207321365717, "grad_norm": 1.9886139631271362, "learning_rate": 1.1530277600224537e-05, "loss": 0.6584, "step": 4229 }, { "epoch": 0.29778247096092925, "grad_norm": 1.772824764251709, "learning_rate": 1.1529060714847484e-05, "loss": 0.6713, "step": 4230 }, { "epoch": 0.2978528687082013, "grad_norm": 1.763185739517212, "learning_rate": 1.1527843594000466e-05, "loss": 0.8575, "step": 4231 }, { "epoch": 0.2979232664554734, "grad_norm": 2.180553674697876, "learning_rate": 1.1526626237746758e-05, "loss": 0.6526, "step": 4232 }, { "epoch": 0.29799366420274553, "grad_norm": 1.6845239400863647, "learning_rate": 1.1525408646149651e-05, "loss": 0.7712, "step": 4233 }, { "epoch": 0.2980640619500176, "grad_norm": 1.873063325881958, "learning_rate": 1.1524190819272452e-05, "loss": 0.6805, "step": 4234 }, { "epoch": 0.2981344596972897, "grad_norm": 2.042973756790161, "learning_rate": 1.1522972757178472e-05, "loss": 0.7536, "step": 4235 }, { "epoch": 0.29820485744456177, "grad_norm": 2.2274844646453857, "learning_rate": 1.1521754459931045e-05, "loss": 0.67, "step": 4236 }, { "epoch": 0.29827525519183384, "grad_norm": 1.9070934057235718, "learning_rate": 1.1520535927593507e-05, "loss": 0.6857, "step": 4237 }, { "epoch": 0.2983456529391059, "grad_norm": 2.415416717529297, "learning_rate": 1.1519317160229213e-05, "loss": 0.7473, "step": 4238 }, { "epoch": 0.29841605068637805, "grad_norm": 1.8746765851974487, "learning_rate": 1.1518098157901524e-05, "loss": 0.6734, "step": 4239 }, { "epoch": 0.29848644843365013, "grad_norm": 1.9516246318817139, "learning_rate": 1.1516878920673824e-05, "loss": 0.6166, "step": 4240 }, { "epoch": 0.2985568461809222, "grad_norm": 1.9970767498016357, "learning_rate": 1.1515659448609494e-05, "loss": 0.8967, "step": 4241 }, { "epoch": 0.2986272439281943, "grad_norm": 1.9818624258041382, "learning_rate": 1.1514439741771942e-05, "loss": 0.747, "step": 4242 }, { "epoch": 0.29869764167546636, "grad_norm": 1.9874707460403442, "learning_rate": 1.151321980022458e-05, "loss": 0.6131, "step": 4243 }, { "epoch": 0.2987680394227385, "grad_norm": 2.037710666656494, "learning_rate": 1.1511999624030832e-05, "loss": 0.7749, "step": 4244 }, { "epoch": 0.2988384371700106, "grad_norm": 1.9172910451889038, "learning_rate": 1.1510779213254139e-05, "loss": 0.7046, "step": 4245 }, { "epoch": 0.29890883491728265, "grad_norm": 1.749405026435852, "learning_rate": 1.1509558567957949e-05, "loss": 0.6931, "step": 4246 }, { "epoch": 0.29897923266455473, "grad_norm": 1.7065517902374268, "learning_rate": 1.1508337688205724e-05, "loss": 0.6921, "step": 4247 }, { "epoch": 0.2990496304118268, "grad_norm": 1.6949689388275146, "learning_rate": 1.150711657406094e-05, "loss": 0.6667, "step": 4248 }, { "epoch": 0.2991200281590989, "grad_norm": 1.9782726764678955, "learning_rate": 1.1505895225587084e-05, "loss": 0.729, "step": 4249 }, { "epoch": 0.299190425906371, "grad_norm": 1.8376926183700562, "learning_rate": 1.1504673642847652e-05, "loss": 0.7957, "step": 4250 }, { "epoch": 0.2992608236536431, "grad_norm": 1.9028868675231934, "learning_rate": 1.150345182590616e-05, "loss": 0.7499, "step": 4251 }, { "epoch": 0.29933122140091517, "grad_norm": 2.0015385150909424, "learning_rate": 1.1502229774826129e-05, "loss": 0.7613, "step": 4252 }, { "epoch": 0.29940161914818725, "grad_norm": 1.9646450281143188, "learning_rate": 1.1501007489671093e-05, "loss": 0.7153, "step": 4253 }, { "epoch": 0.2994720168954593, "grad_norm": 1.708627700805664, "learning_rate": 1.1499784970504602e-05, "loss": 0.6762, "step": 4254 }, { "epoch": 0.29954241464273146, "grad_norm": 1.8956190347671509, "learning_rate": 1.1498562217390216e-05, "loss": 0.7557, "step": 4255 }, { "epoch": 0.29961281239000354, "grad_norm": 2.0169382095336914, "learning_rate": 1.1497339230391506e-05, "loss": 0.7313, "step": 4256 }, { "epoch": 0.2996832101372756, "grad_norm": 1.7282469272613525, "learning_rate": 1.1496116009572054e-05, "loss": 0.7057, "step": 4257 }, { "epoch": 0.2997536078845477, "grad_norm": 2.005577564239502, "learning_rate": 1.149489255499546e-05, "loss": 0.856, "step": 4258 }, { "epoch": 0.29982400563181977, "grad_norm": 1.8666224479675293, "learning_rate": 1.1493668866725334e-05, "loss": 0.7105, "step": 4259 }, { "epoch": 0.29989440337909185, "grad_norm": 1.6316384077072144, "learning_rate": 1.149244494482529e-05, "loss": 0.628, "step": 4260 }, { "epoch": 0.299964801126364, "grad_norm": 2.8094582557678223, "learning_rate": 1.1491220789358968e-05, "loss": 0.7069, "step": 4261 }, { "epoch": 0.30003519887363606, "grad_norm": 2.3220767974853516, "learning_rate": 1.1489996400390007e-05, "loss": 0.7615, "step": 4262 }, { "epoch": 0.30010559662090813, "grad_norm": 1.971850872039795, "learning_rate": 1.1488771777982067e-05, "loss": 0.7097, "step": 4263 }, { "epoch": 0.3001759943681802, "grad_norm": 1.7892301082611084, "learning_rate": 1.148754692219882e-05, "loss": 0.7233, "step": 4264 }, { "epoch": 0.3002463921154523, "grad_norm": 1.6804109811782837, "learning_rate": 1.1486321833103942e-05, "loss": 0.7664, "step": 4265 }, { "epoch": 0.3003167898627244, "grad_norm": 1.9577311277389526, "learning_rate": 1.1485096510761128e-05, "loss": 0.6451, "step": 4266 }, { "epoch": 0.3003871876099965, "grad_norm": 1.7580907344818115, "learning_rate": 1.1483870955234086e-05, "loss": 0.6583, "step": 4267 }, { "epoch": 0.3004575853572686, "grad_norm": 1.8449066877365112, "learning_rate": 1.1482645166586531e-05, "loss": 0.7346, "step": 4268 }, { "epoch": 0.30052798310454065, "grad_norm": 1.741262435913086, "learning_rate": 1.1481419144882194e-05, "loss": 0.6865, "step": 4269 }, { "epoch": 0.30059838085181273, "grad_norm": 1.6650372743606567, "learning_rate": 1.148019289018482e-05, "loss": 0.6263, "step": 4270 }, { "epoch": 0.3006687785990848, "grad_norm": 1.4766508340835571, "learning_rate": 1.1478966402558158e-05, "loss": 0.7805, "step": 4271 }, { "epoch": 0.30073917634635694, "grad_norm": 1.7098069190979004, "learning_rate": 1.1477739682065976e-05, "loss": 0.7468, "step": 4272 }, { "epoch": 0.300809574093629, "grad_norm": 2.083409547805786, "learning_rate": 1.1476512728772052e-05, "loss": 0.7059, "step": 4273 }, { "epoch": 0.3008799718409011, "grad_norm": 1.970391035079956, "learning_rate": 1.1475285542740178e-05, "loss": 0.7215, "step": 4274 }, { "epoch": 0.30095036958817317, "grad_norm": 1.8966392278671265, "learning_rate": 1.1474058124034154e-05, "loss": 0.6677, "step": 4275 }, { "epoch": 0.30102076733544525, "grad_norm": 1.7939531803131104, "learning_rate": 1.1472830472717799e-05, "loss": 0.6206, "step": 4276 }, { "epoch": 0.3010911650827173, "grad_norm": 2.028278350830078, "learning_rate": 1.1471602588854936e-05, "loss": 0.7488, "step": 4277 }, { "epoch": 0.30116156282998946, "grad_norm": 2.0844414234161377, "learning_rate": 1.1470374472509405e-05, "loss": 0.7145, "step": 4278 }, { "epoch": 0.30123196057726154, "grad_norm": 2.0381007194519043, "learning_rate": 1.1469146123745055e-05, "loss": 0.7553, "step": 4279 }, { "epoch": 0.3013023583245336, "grad_norm": 1.7207715511322021, "learning_rate": 1.1467917542625753e-05, "loss": 0.7665, "step": 4280 }, { "epoch": 0.3013727560718057, "grad_norm": 1.938336730003357, "learning_rate": 1.1466688729215369e-05, "loss": 0.6937, "step": 4281 }, { "epoch": 0.30144315381907777, "grad_norm": 1.9975801706314087, "learning_rate": 1.1465459683577794e-05, "loss": 0.661, "step": 4282 }, { "epoch": 0.3015135515663499, "grad_norm": 1.7579379081726074, "learning_rate": 1.1464230405776925e-05, "loss": 0.6149, "step": 4283 }, { "epoch": 0.301583949313622, "grad_norm": 1.633048415184021, "learning_rate": 1.1463000895876675e-05, "loss": 0.7851, "step": 4284 }, { "epoch": 0.30165434706089406, "grad_norm": 1.9303456544876099, "learning_rate": 1.1461771153940967e-05, "loss": 0.8619, "step": 4285 }, { "epoch": 0.30172474480816613, "grad_norm": 1.7720756530761719, "learning_rate": 1.1460541180033734e-05, "loss": 0.7095, "step": 4286 }, { "epoch": 0.3017951425554382, "grad_norm": 1.850172996520996, "learning_rate": 1.1459310974218927e-05, "loss": 0.7403, "step": 4287 }, { "epoch": 0.3018655403027103, "grad_norm": 1.7154278755187988, "learning_rate": 1.1458080536560505e-05, "loss": 0.7715, "step": 4288 }, { "epoch": 0.3019359380499824, "grad_norm": 1.7996597290039062, "learning_rate": 1.1456849867122435e-05, "loss": 0.8074, "step": 4289 }, { "epoch": 0.3020063357972545, "grad_norm": 1.8695168495178223, "learning_rate": 1.1455618965968706e-05, "loss": 0.721, "step": 4290 }, { "epoch": 0.3020767335445266, "grad_norm": 2.522082567214966, "learning_rate": 1.1454387833163312e-05, "loss": 0.6756, "step": 4291 }, { "epoch": 0.30214713129179865, "grad_norm": 1.7567757368087769, "learning_rate": 1.145315646877026e-05, "loss": 0.8454, "step": 4292 }, { "epoch": 0.30221752903907073, "grad_norm": 1.7004073858261108, "learning_rate": 1.1451924872853571e-05, "loss": 0.8564, "step": 4293 }, { "epoch": 0.30228792678634286, "grad_norm": 2.590108633041382, "learning_rate": 1.1450693045477273e-05, "loss": 0.7542, "step": 4294 }, { "epoch": 0.30235832453361494, "grad_norm": 1.9655492305755615, "learning_rate": 1.1449460986705416e-05, "loss": 0.8674, "step": 4295 }, { "epoch": 0.302428722280887, "grad_norm": 1.7174245119094849, "learning_rate": 1.144822869660205e-05, "loss": 0.6721, "step": 4296 }, { "epoch": 0.3024991200281591, "grad_norm": 1.8002259731292725, "learning_rate": 1.1446996175231249e-05, "loss": 0.8115, "step": 4297 }, { "epoch": 0.3025695177754312, "grad_norm": 1.649996280670166, "learning_rate": 1.1445763422657088e-05, "loss": 0.6907, "step": 4298 }, { "epoch": 0.30263991552270325, "grad_norm": 1.7571666240692139, "learning_rate": 1.144453043894366e-05, "loss": 0.7523, "step": 4299 }, { "epoch": 0.3027103132699754, "grad_norm": 2.158149003982544, "learning_rate": 1.1443297224155068e-05, "loss": 0.7329, "step": 4300 }, { "epoch": 0.30278071101724746, "grad_norm": 1.8641117811203003, "learning_rate": 1.1442063778355432e-05, "loss": 0.6249, "step": 4301 }, { "epoch": 0.30285110876451954, "grad_norm": 1.801475167274475, "learning_rate": 1.1440830101608874e-05, "loss": 0.8757, "step": 4302 }, { "epoch": 0.3029215065117916, "grad_norm": 2.071516990661621, "learning_rate": 1.143959619397954e-05, "loss": 0.7191, "step": 4303 }, { "epoch": 0.3029919042590637, "grad_norm": 1.8381575345993042, "learning_rate": 1.1438362055531576e-05, "loss": 0.6116, "step": 4304 }, { "epoch": 0.30306230200633577, "grad_norm": 1.7437951564788818, "learning_rate": 1.1437127686329152e-05, "loss": 0.7939, "step": 4305 }, { "epoch": 0.3031326997536079, "grad_norm": 1.7240639925003052, "learning_rate": 1.143589308643644e-05, "loss": 0.711, "step": 4306 }, { "epoch": 0.30320309750088, "grad_norm": 1.564963936805725, "learning_rate": 1.1434658255917628e-05, "loss": 0.8158, "step": 4307 }, { "epoch": 0.30327349524815206, "grad_norm": 1.801493525505066, "learning_rate": 1.1433423194836918e-05, "loss": 0.7385, "step": 4308 }, { "epoch": 0.30334389299542414, "grad_norm": 1.7981945276260376, "learning_rate": 1.1432187903258521e-05, "loss": 0.6779, "step": 4309 }, { "epoch": 0.3034142907426962, "grad_norm": 1.9064526557922363, "learning_rate": 1.143095238124666e-05, "loss": 0.6856, "step": 4310 }, { "epoch": 0.30348468848996835, "grad_norm": 1.7649400234222412, "learning_rate": 1.1429716628865573e-05, "loss": 0.6884, "step": 4311 }, { "epoch": 0.3035550862372404, "grad_norm": 1.8437376022338867, "learning_rate": 1.1428480646179507e-05, "loss": 0.682, "step": 4312 }, { "epoch": 0.3036254839845125, "grad_norm": 1.8373726606369019, "learning_rate": 1.1427244433252721e-05, "loss": 0.7228, "step": 4313 }, { "epoch": 0.3036958817317846, "grad_norm": 1.9337040185928345, "learning_rate": 1.1426007990149489e-05, "loss": 0.7465, "step": 4314 }, { "epoch": 0.30376627947905666, "grad_norm": 2.076680898666382, "learning_rate": 1.142477131693409e-05, "loss": 0.6002, "step": 4315 }, { "epoch": 0.30383667722632873, "grad_norm": 1.6452783346176147, "learning_rate": 1.1423534413670823e-05, "loss": 0.6466, "step": 4316 }, { "epoch": 0.30390707497360087, "grad_norm": 2.4098012447357178, "learning_rate": 1.1422297280423997e-05, "loss": 0.609, "step": 4317 }, { "epoch": 0.30397747272087294, "grad_norm": 2.0799379348754883, "learning_rate": 1.1421059917257933e-05, "loss": 0.7568, "step": 4318 }, { "epoch": 0.304047870468145, "grad_norm": 1.8928236961364746, "learning_rate": 1.1419822324236954e-05, "loss": 0.745, "step": 4319 }, { "epoch": 0.3041182682154171, "grad_norm": 1.962492823600769, "learning_rate": 1.1418584501425414e-05, "loss": 0.6508, "step": 4320 }, { "epoch": 0.3041886659626892, "grad_norm": 2.0172646045684814, "learning_rate": 1.1417346448887664e-05, "loss": 0.7898, "step": 4321 }, { "epoch": 0.3042590637099613, "grad_norm": 1.7276333570480347, "learning_rate": 1.141610816668807e-05, "loss": 0.7952, "step": 4322 }, { "epoch": 0.3043294614572334, "grad_norm": 2.5904393196105957, "learning_rate": 1.1414869654891013e-05, "loss": 0.756, "step": 4323 }, { "epoch": 0.30439985920450546, "grad_norm": 1.8802542686462402, "learning_rate": 1.1413630913560886e-05, "loss": 0.7895, "step": 4324 }, { "epoch": 0.30447025695177754, "grad_norm": 1.5468106269836426, "learning_rate": 1.1412391942762087e-05, "loss": 0.6655, "step": 4325 }, { "epoch": 0.3045406546990496, "grad_norm": 1.8192750215530396, "learning_rate": 1.1411152742559038e-05, "loss": 0.7719, "step": 4326 }, { "epoch": 0.3046110524463217, "grad_norm": 1.7130701541900635, "learning_rate": 1.1409913313016162e-05, "loss": 0.7601, "step": 4327 }, { "epoch": 0.30468145019359383, "grad_norm": 1.583688497543335, "learning_rate": 1.1408673654197898e-05, "loss": 0.7326, "step": 4328 }, { "epoch": 0.3047518479408659, "grad_norm": 1.796613097190857, "learning_rate": 1.14074337661687e-05, "loss": 0.7714, "step": 4329 }, { "epoch": 0.304822245688138, "grad_norm": 1.7656389474868774, "learning_rate": 1.1406193648993026e-05, "loss": 0.6384, "step": 4330 }, { "epoch": 0.30489264343541006, "grad_norm": 2.1797616481781006, "learning_rate": 1.1404953302735357e-05, "loss": 0.6813, "step": 4331 }, { "epoch": 0.30496304118268214, "grad_norm": 1.7639738321304321, "learning_rate": 1.1403712727460174e-05, "loss": 0.6994, "step": 4332 }, { "epoch": 0.3050334389299542, "grad_norm": 2.5791015625, "learning_rate": 1.1402471923231979e-05, "loss": 0.6859, "step": 4333 }, { "epoch": 0.30510383667722635, "grad_norm": 2.266111373901367, "learning_rate": 1.1401230890115281e-05, "loss": 0.7917, "step": 4334 }, { "epoch": 0.3051742344244984, "grad_norm": 1.8612096309661865, "learning_rate": 1.1399989628174605e-05, "loss": 0.6277, "step": 4335 }, { "epoch": 0.3052446321717705, "grad_norm": 1.6100518703460693, "learning_rate": 1.139874813747448e-05, "loss": 0.6793, "step": 4336 }, { "epoch": 0.3053150299190426, "grad_norm": 1.6753222942352295, "learning_rate": 1.139750641807946e-05, "loss": 0.6409, "step": 4337 }, { "epoch": 0.30538542766631466, "grad_norm": 1.960266351699829, "learning_rate": 1.1396264470054096e-05, "loss": 0.7386, "step": 4338 }, { "epoch": 0.3054558254135868, "grad_norm": 1.9386405944824219, "learning_rate": 1.1395022293462962e-05, "loss": 0.8227, "step": 4339 }, { "epoch": 0.30552622316085887, "grad_norm": 2.245473861694336, "learning_rate": 1.1393779888370637e-05, "loss": 0.6942, "step": 4340 }, { "epoch": 0.30559662090813094, "grad_norm": 3.9977176189422607, "learning_rate": 1.1392537254841717e-05, "loss": 0.7536, "step": 4341 }, { "epoch": 0.305667018655403, "grad_norm": 1.6711347103118896, "learning_rate": 1.1391294392940806e-05, "loss": 0.7296, "step": 4342 }, { "epoch": 0.3057374164026751, "grad_norm": 2.239342451095581, "learning_rate": 1.1390051302732524e-05, "loss": 0.7502, "step": 4343 }, { "epoch": 0.3058078141499472, "grad_norm": 1.651098608970642, "learning_rate": 1.1388807984281498e-05, "loss": 0.628, "step": 4344 }, { "epoch": 0.3058782118972193, "grad_norm": 1.8950421810150146, "learning_rate": 1.1387564437652372e-05, "loss": 0.7798, "step": 4345 }, { "epoch": 0.3059486096444914, "grad_norm": 1.8111618757247925, "learning_rate": 1.1386320662909795e-05, "loss": 0.671, "step": 4346 }, { "epoch": 0.30601900739176346, "grad_norm": 1.6698771715164185, "learning_rate": 1.1385076660118436e-05, "loss": 0.649, "step": 4347 }, { "epoch": 0.30608940513903554, "grad_norm": 1.5967646837234497, "learning_rate": 1.1383832429342968e-05, "loss": 0.8678, "step": 4348 }, { "epoch": 0.3061598028863076, "grad_norm": 1.6762512922286987, "learning_rate": 1.1382587970648083e-05, "loss": 0.7539, "step": 4349 }, { "epoch": 0.30623020063357975, "grad_norm": 2.094326972961426, "learning_rate": 1.1381343284098482e-05, "loss": 0.7553, "step": 4350 }, { "epoch": 0.30630059838085183, "grad_norm": 1.8420369625091553, "learning_rate": 1.1380098369758873e-05, "loss": 0.7693, "step": 4351 }, { "epoch": 0.3063709961281239, "grad_norm": 2.479412794113159, "learning_rate": 1.1378853227693985e-05, "loss": 0.6938, "step": 4352 }, { "epoch": 0.306441393875396, "grad_norm": 1.6419183015823364, "learning_rate": 1.137760785796855e-05, "loss": 0.6573, "step": 4353 }, { "epoch": 0.30651179162266806, "grad_norm": 1.6391011476516724, "learning_rate": 1.1376362260647318e-05, "loss": 0.6131, "step": 4354 }, { "epoch": 0.30658218936994014, "grad_norm": 1.8573042154312134, "learning_rate": 1.137511643579505e-05, "loss": 0.7261, "step": 4355 }, { "epoch": 0.30665258711721227, "grad_norm": 1.9858543872833252, "learning_rate": 1.1373870383476514e-05, "loss": 0.774, "step": 4356 }, { "epoch": 0.30672298486448435, "grad_norm": 1.6675422191619873, "learning_rate": 1.1372624103756497e-05, "loss": 0.6758, "step": 4357 }, { "epoch": 0.3067933826117564, "grad_norm": 1.7947354316711426, "learning_rate": 1.1371377596699793e-05, "loss": 0.7123, "step": 4358 }, { "epoch": 0.3068637803590285, "grad_norm": 2.1095118522644043, "learning_rate": 1.137013086237121e-05, "loss": 0.6894, "step": 4359 }, { "epoch": 0.3069341781063006, "grad_norm": 2.0683975219726562, "learning_rate": 1.1368883900835563e-05, "loss": 0.695, "step": 4360 }, { "epoch": 0.30700457585357266, "grad_norm": 2.146839141845703, "learning_rate": 1.1367636712157685e-05, "loss": 0.7407, "step": 4361 }, { "epoch": 0.3070749736008448, "grad_norm": 2.200083017349243, "learning_rate": 1.1366389296402422e-05, "loss": 0.6493, "step": 4362 }, { "epoch": 0.30714537134811687, "grad_norm": 2.5257480144500732, "learning_rate": 1.1365141653634625e-05, "loss": 0.823, "step": 4363 }, { "epoch": 0.30721576909538895, "grad_norm": 1.761011004447937, "learning_rate": 1.1363893783919158e-05, "loss": 0.7082, "step": 4364 }, { "epoch": 0.307286166842661, "grad_norm": 1.9247416257858276, "learning_rate": 1.13626456873209e-05, "loss": 0.664, "step": 4365 }, { "epoch": 0.3073565645899331, "grad_norm": 2.2318787574768066, "learning_rate": 1.1361397363904744e-05, "loss": 0.7351, "step": 4366 }, { "epoch": 0.30742696233720523, "grad_norm": 2.5091090202331543, "learning_rate": 1.1360148813735589e-05, "loss": 0.731, "step": 4367 }, { "epoch": 0.3074973600844773, "grad_norm": 1.7051278352737427, "learning_rate": 1.1358900036878346e-05, "loss": 0.6555, "step": 4368 }, { "epoch": 0.3075677578317494, "grad_norm": 1.8821067810058594, "learning_rate": 1.1357651033397946e-05, "loss": 0.6845, "step": 4369 }, { "epoch": 0.30763815557902147, "grad_norm": 2.4951367378234863, "learning_rate": 1.1356401803359318e-05, "loss": 0.7266, "step": 4370 }, { "epoch": 0.30770855332629354, "grad_norm": 2.028353691101074, "learning_rate": 1.1355152346827418e-05, "loss": 0.7387, "step": 4371 }, { "epoch": 0.3077789510735656, "grad_norm": 1.8838082551956177, "learning_rate": 1.1353902663867202e-05, "loss": 0.7879, "step": 4372 }, { "epoch": 0.30784934882083775, "grad_norm": 1.768048882484436, "learning_rate": 1.1352652754543644e-05, "loss": 0.5802, "step": 4373 }, { "epoch": 0.30791974656810983, "grad_norm": 1.8610402345657349, "learning_rate": 1.1351402618921728e-05, "loss": 0.7722, "step": 4374 }, { "epoch": 0.3079901443153819, "grad_norm": 1.6052199602127075, "learning_rate": 1.1350152257066446e-05, "loss": 0.6974, "step": 4375 }, { "epoch": 0.308060542062654, "grad_norm": 2.05703067779541, "learning_rate": 1.134890166904281e-05, "loss": 0.7872, "step": 4376 }, { "epoch": 0.30813093980992606, "grad_norm": 1.7768021821975708, "learning_rate": 1.134765085491584e-05, "loss": 0.7272, "step": 4377 }, { "epoch": 0.3082013375571982, "grad_norm": 2.476152181625366, "learning_rate": 1.134639981475056e-05, "loss": 0.6679, "step": 4378 }, { "epoch": 0.3082717353044703, "grad_norm": 2.0078017711639404, "learning_rate": 1.1345148548612022e-05, "loss": 0.7534, "step": 4379 }, { "epoch": 0.30834213305174235, "grad_norm": 1.8455250263214111, "learning_rate": 1.1343897056565274e-05, "loss": 0.7022, "step": 4380 }, { "epoch": 0.30841253079901443, "grad_norm": 2.179657459259033, "learning_rate": 1.1342645338675384e-05, "loss": 0.639, "step": 4381 }, { "epoch": 0.3084829285462865, "grad_norm": 1.780776858329773, "learning_rate": 1.134139339500743e-05, "loss": 0.8119, "step": 4382 }, { "epoch": 0.3085533262935586, "grad_norm": 3.017428159713745, "learning_rate": 1.13401412256265e-05, "loss": 0.6308, "step": 4383 }, { "epoch": 0.3086237240408307, "grad_norm": 1.898450255393982, "learning_rate": 1.1338888830597697e-05, "loss": 0.8203, "step": 4384 }, { "epoch": 0.3086941217881028, "grad_norm": 1.474715232849121, "learning_rate": 1.1337636209986137e-05, "loss": 0.7641, "step": 4385 }, { "epoch": 0.30876451953537487, "grad_norm": 1.704830527305603, "learning_rate": 1.133638336385694e-05, "loss": 0.8386, "step": 4386 }, { "epoch": 0.30883491728264695, "grad_norm": 1.6816809177398682, "learning_rate": 1.1335130292275245e-05, "loss": 0.6231, "step": 4387 }, { "epoch": 0.308905315029919, "grad_norm": 1.5998969078063965, "learning_rate": 1.1333876995306201e-05, "loss": 0.7495, "step": 4388 }, { "epoch": 0.3089757127771911, "grad_norm": 1.7035197019577026, "learning_rate": 1.1332623473014967e-05, "loss": 0.7131, "step": 4389 }, { "epoch": 0.30904611052446324, "grad_norm": 1.8200337886810303, "learning_rate": 1.1331369725466712e-05, "loss": 0.7429, "step": 4390 }, { "epoch": 0.3091165082717353, "grad_norm": 1.644662618637085, "learning_rate": 1.1330115752726624e-05, "loss": 0.7655, "step": 4391 }, { "epoch": 0.3091869060190074, "grad_norm": 1.674791693687439, "learning_rate": 1.1328861554859897e-05, "loss": 0.8328, "step": 4392 }, { "epoch": 0.30925730376627947, "grad_norm": 1.8064918518066406, "learning_rate": 1.1327607131931737e-05, "loss": 0.7444, "step": 4393 }, { "epoch": 0.30932770151355155, "grad_norm": 2.0735220909118652, "learning_rate": 1.1326352484007363e-05, "loss": 0.7773, "step": 4394 }, { "epoch": 0.3093980992608237, "grad_norm": 1.9409379959106445, "learning_rate": 1.1325097611152004e-05, "loss": 0.6341, "step": 4395 }, { "epoch": 0.30946849700809576, "grad_norm": 1.7155520915985107, "learning_rate": 1.1323842513430906e-05, "loss": 0.7714, "step": 4396 }, { "epoch": 0.30953889475536783, "grad_norm": 1.7087410688400269, "learning_rate": 1.1322587190909317e-05, "loss": 0.6672, "step": 4397 }, { "epoch": 0.3096092925026399, "grad_norm": 1.7948307991027832, "learning_rate": 1.1321331643652506e-05, "loss": 0.6713, "step": 4398 }, { "epoch": 0.309679690249912, "grad_norm": 1.3273446559906006, "learning_rate": 1.132007587172575e-05, "loss": 0.5301, "step": 4399 }, { "epoch": 0.30975008799718406, "grad_norm": 2.8458659648895264, "learning_rate": 1.1318819875194337e-05, "loss": 0.7105, "step": 4400 }, { "epoch": 0.3098204857444562, "grad_norm": 2.3110504150390625, "learning_rate": 1.1317563654123566e-05, "loss": 0.7712, "step": 4401 }, { "epoch": 0.3098908834917283, "grad_norm": 1.9065675735473633, "learning_rate": 1.131630720857875e-05, "loss": 0.6745, "step": 4402 }, { "epoch": 0.30996128123900035, "grad_norm": 2.0457825660705566, "learning_rate": 1.1315050538625215e-05, "loss": 0.7622, "step": 4403 }, { "epoch": 0.31003167898627243, "grad_norm": 2.2997398376464844, "learning_rate": 1.1313793644328292e-05, "loss": 0.6855, "step": 4404 }, { "epoch": 0.3101020767335445, "grad_norm": 1.907590389251709, "learning_rate": 1.1312536525753334e-05, "loss": 0.7633, "step": 4405 }, { "epoch": 0.31017247448081664, "grad_norm": 1.7173782587051392, "learning_rate": 1.1311279182965697e-05, "loss": 0.7747, "step": 4406 }, { "epoch": 0.3102428722280887, "grad_norm": 1.7234442234039307, "learning_rate": 1.1310021616030746e-05, "loss": 0.7369, "step": 4407 }, { "epoch": 0.3103132699753608, "grad_norm": 2.2668983936309814, "learning_rate": 1.130876382501387e-05, "loss": 0.7413, "step": 4408 }, { "epoch": 0.31038366772263287, "grad_norm": 1.7635817527770996, "learning_rate": 1.1307505809980462e-05, "loss": 0.9007, "step": 4409 }, { "epoch": 0.31045406546990495, "grad_norm": 1.9425183534622192, "learning_rate": 1.1306247570995925e-05, "loss": 0.658, "step": 4410 }, { "epoch": 0.310524463217177, "grad_norm": 2.2386856079101562, "learning_rate": 1.1304989108125676e-05, "loss": 0.83, "step": 4411 }, { "epoch": 0.31059486096444916, "grad_norm": 2.3488993644714355, "learning_rate": 1.1303730421435143e-05, "loss": 0.7089, "step": 4412 }, { "epoch": 0.31066525871172124, "grad_norm": 1.9710348844528198, "learning_rate": 1.1302471510989772e-05, "loss": 0.7086, "step": 4413 }, { "epoch": 0.3107356564589933, "grad_norm": 1.8465609550476074, "learning_rate": 1.1301212376855006e-05, "loss": 0.6492, "step": 4414 }, { "epoch": 0.3108060542062654, "grad_norm": 2.5460364818573, "learning_rate": 1.1299953019096315e-05, "loss": 0.7003, "step": 4415 }, { "epoch": 0.31087645195353747, "grad_norm": 1.8237310647964478, "learning_rate": 1.1298693437779175e-05, "loss": 0.778, "step": 4416 }, { "epoch": 0.31094684970080955, "grad_norm": 2.1486828327178955, "learning_rate": 1.1297433632969066e-05, "loss": 0.6883, "step": 4417 }, { "epoch": 0.3110172474480817, "grad_norm": 2.267162322998047, "learning_rate": 1.1296173604731493e-05, "loss": 0.7708, "step": 4418 }, { "epoch": 0.31108764519535376, "grad_norm": 1.8029956817626953, "learning_rate": 1.129491335313196e-05, "loss": 0.788, "step": 4419 }, { "epoch": 0.31115804294262583, "grad_norm": 2.183175802230835, "learning_rate": 1.1293652878235996e-05, "loss": 0.6463, "step": 4420 }, { "epoch": 0.3112284406898979, "grad_norm": 1.9352731704711914, "learning_rate": 1.1292392180109129e-05, "loss": 0.765, "step": 4421 }, { "epoch": 0.31129883843717, "grad_norm": 1.8515058755874634, "learning_rate": 1.1291131258816905e-05, "loss": 0.7415, "step": 4422 }, { "epoch": 0.3113692361844421, "grad_norm": 2.0118114948272705, "learning_rate": 1.1289870114424881e-05, "loss": 0.8165, "step": 4423 }, { "epoch": 0.3114396339317142, "grad_norm": 2.0142111778259277, "learning_rate": 1.1288608746998623e-05, "loss": 0.7421, "step": 4424 }, { "epoch": 0.3115100316789863, "grad_norm": 1.890453815460205, "learning_rate": 1.1287347156603713e-05, "loss": 0.6554, "step": 4425 }, { "epoch": 0.31158042942625835, "grad_norm": 1.7478880882263184, "learning_rate": 1.1286085343305743e-05, "loss": 0.6375, "step": 4426 }, { "epoch": 0.31165082717353043, "grad_norm": 1.9007643461227417, "learning_rate": 1.1284823307170314e-05, "loss": 0.8103, "step": 4427 }, { "epoch": 0.3117212249208025, "grad_norm": 1.9803576469421387, "learning_rate": 1.1283561048263038e-05, "loss": 0.7024, "step": 4428 }, { "epoch": 0.31179162266807464, "grad_norm": 1.592486023902893, "learning_rate": 1.1282298566649546e-05, "loss": 0.7462, "step": 4429 }, { "epoch": 0.3118620204153467, "grad_norm": 1.5957720279693604, "learning_rate": 1.1281035862395472e-05, "loss": 0.7653, "step": 4430 }, { "epoch": 0.3119324181626188, "grad_norm": 1.9658104181289673, "learning_rate": 1.1279772935566467e-05, "loss": 0.7372, "step": 4431 }, { "epoch": 0.3120028159098909, "grad_norm": 1.8854186534881592, "learning_rate": 1.1278509786228191e-05, "loss": 0.656, "step": 4432 }, { "epoch": 0.31207321365716295, "grad_norm": 2.2209527492523193, "learning_rate": 1.1277246414446318e-05, "loss": 0.7337, "step": 4433 }, { "epoch": 0.3121436114044351, "grad_norm": 1.8064396381378174, "learning_rate": 1.1275982820286528e-05, "loss": 0.61, "step": 4434 }, { "epoch": 0.31221400915170716, "grad_norm": 1.6920034885406494, "learning_rate": 1.127471900381452e-05, "loss": 0.6788, "step": 4435 }, { "epoch": 0.31228440689897924, "grad_norm": 1.7305679321289062, "learning_rate": 1.1273454965096e-05, "loss": 0.623, "step": 4436 }, { "epoch": 0.3123548046462513, "grad_norm": 2.102536678314209, "learning_rate": 1.1272190704196685e-05, "loss": 0.6294, "step": 4437 }, { "epoch": 0.3124252023935234, "grad_norm": 2.822849988937378, "learning_rate": 1.1270926221182305e-05, "loss": 0.7622, "step": 4438 }, { "epoch": 0.31249560014079547, "grad_norm": 1.9169998168945312, "learning_rate": 1.1269661516118606e-05, "loss": 0.7299, "step": 4439 }, { "epoch": 0.3125659978880676, "grad_norm": 2.0618340969085693, "learning_rate": 1.1268396589071334e-05, "loss": 0.7098, "step": 4440 }, { "epoch": 0.3126363956353397, "grad_norm": 1.7647374868392944, "learning_rate": 1.126713144010626e-05, "loss": 0.6972, "step": 4441 }, { "epoch": 0.31270679338261176, "grad_norm": 1.5347208976745605, "learning_rate": 1.1265866069289159e-05, "loss": 0.7326, "step": 4442 }, { "epoch": 0.31277719112988384, "grad_norm": 1.613402247428894, "learning_rate": 1.1264600476685815e-05, "loss": 0.6859, "step": 4443 }, { "epoch": 0.3128475888771559, "grad_norm": 2.1900596618652344, "learning_rate": 1.126333466236203e-05, "loss": 0.732, "step": 4444 }, { "epoch": 0.312917986624428, "grad_norm": 1.8385682106018066, "learning_rate": 1.1262068626383617e-05, "loss": 0.7533, "step": 4445 }, { "epoch": 0.3129883843717001, "grad_norm": 1.8919141292572021, "learning_rate": 1.1260802368816397e-05, "loss": 0.7224, "step": 4446 }, { "epoch": 0.3130587821189722, "grad_norm": 1.9405122995376587, "learning_rate": 1.1259535889726198e-05, "loss": 0.7485, "step": 4447 }, { "epoch": 0.3131291798662443, "grad_norm": 1.9961355924606323, "learning_rate": 1.1258269189178875e-05, "loss": 0.7827, "step": 4448 }, { "epoch": 0.31319957761351636, "grad_norm": 2.0449230670928955, "learning_rate": 1.1257002267240279e-05, "loss": 0.8264, "step": 4449 }, { "epoch": 0.31326997536078843, "grad_norm": 1.664318561553955, "learning_rate": 1.125573512397628e-05, "loss": 0.6704, "step": 4450 }, { "epoch": 0.31334037310806057, "grad_norm": 1.7518949508666992, "learning_rate": 1.1254467759452756e-05, "loss": 0.7241, "step": 4451 }, { "epoch": 0.31341077085533264, "grad_norm": 2.240057945251465, "learning_rate": 1.1253200173735603e-05, "loss": 0.7087, "step": 4452 }, { "epoch": 0.3134811686026047, "grad_norm": 1.7657912969589233, "learning_rate": 1.125193236689072e-05, "loss": 0.669, "step": 4453 }, { "epoch": 0.3135515663498768, "grad_norm": 1.9772002696990967, "learning_rate": 1.1250664338984022e-05, "loss": 0.6825, "step": 4454 }, { "epoch": 0.3136219640971489, "grad_norm": 2.0371551513671875, "learning_rate": 1.1249396090081436e-05, "loss": 0.7, "step": 4455 }, { "epoch": 0.31369236184442095, "grad_norm": 1.8751482963562012, "learning_rate": 1.1248127620248897e-05, "loss": 0.8017, "step": 4456 }, { "epoch": 0.3137627595916931, "grad_norm": 1.9645402431488037, "learning_rate": 1.1246858929552356e-05, "loss": 0.7269, "step": 4457 }, { "epoch": 0.31383315733896516, "grad_norm": 1.744320273399353, "learning_rate": 1.1245590018057776e-05, "loss": 0.8015, "step": 4458 }, { "epoch": 0.31390355508623724, "grad_norm": 1.790252923965454, "learning_rate": 1.1244320885831122e-05, "loss": 0.6426, "step": 4459 }, { "epoch": 0.3139739528335093, "grad_norm": 1.7894060611724854, "learning_rate": 1.1243051532938384e-05, "loss": 0.7504, "step": 4460 }, { "epoch": 0.3140443505807814, "grad_norm": 1.9329396486282349, "learning_rate": 1.1241781959445552e-05, "loss": 0.7705, "step": 4461 }, { "epoch": 0.31411474832805353, "grad_norm": 1.9899910688400269, "learning_rate": 1.1240512165418636e-05, "loss": 0.6508, "step": 4462 }, { "epoch": 0.3141851460753256, "grad_norm": 1.7938060760498047, "learning_rate": 1.1239242150923652e-05, "loss": 0.6864, "step": 4463 }, { "epoch": 0.3142555438225977, "grad_norm": 1.7188503742218018, "learning_rate": 1.1237971916026629e-05, "loss": 0.7078, "step": 4464 }, { "epoch": 0.31432594156986976, "grad_norm": 1.523958683013916, "learning_rate": 1.1236701460793607e-05, "loss": 0.5815, "step": 4465 }, { "epoch": 0.31439633931714184, "grad_norm": 2.1751294136047363, "learning_rate": 1.1235430785290642e-05, "loss": 0.6879, "step": 4466 }, { "epoch": 0.3144667370644139, "grad_norm": 1.8777403831481934, "learning_rate": 1.1234159889583792e-05, "loss": 0.7959, "step": 4467 }, { "epoch": 0.31453713481168605, "grad_norm": 1.8164162635803223, "learning_rate": 1.1232888773739135e-05, "loss": 0.6609, "step": 4468 }, { "epoch": 0.3146075325589581, "grad_norm": 2.0099027156829834, "learning_rate": 1.1231617437822758e-05, "loss": 0.7777, "step": 4469 }, { "epoch": 0.3146779303062302, "grad_norm": 1.7434080839157104, "learning_rate": 1.1230345881900757e-05, "loss": 0.7515, "step": 4470 }, { "epoch": 0.3147483280535023, "grad_norm": 2.0189883708953857, "learning_rate": 1.1229074106039243e-05, "loss": 0.6139, "step": 4471 }, { "epoch": 0.31481872580077436, "grad_norm": 1.981954574584961, "learning_rate": 1.1227802110304335e-05, "loss": 0.6499, "step": 4472 }, { "epoch": 0.3148891235480465, "grad_norm": 1.9777555465698242, "learning_rate": 1.1226529894762167e-05, "loss": 0.8065, "step": 4473 }, { "epoch": 0.31495952129531857, "grad_norm": 2.1682512760162354, "learning_rate": 1.1225257459478883e-05, "loss": 0.7893, "step": 4474 }, { "epoch": 0.31502991904259064, "grad_norm": 2.316328763961792, "learning_rate": 1.1223984804520636e-05, "loss": 0.7506, "step": 4475 }, { "epoch": 0.3151003167898627, "grad_norm": 1.828417420387268, "learning_rate": 1.1222711929953594e-05, "loss": 0.7998, "step": 4476 }, { "epoch": 0.3151707145371348, "grad_norm": 1.7657605409622192, "learning_rate": 1.1221438835843933e-05, "loss": 0.7497, "step": 4477 }, { "epoch": 0.3152411122844069, "grad_norm": 1.7982622385025024, "learning_rate": 1.1220165522257846e-05, "loss": 0.6379, "step": 4478 }, { "epoch": 0.315311510031679, "grad_norm": 1.64960515499115, "learning_rate": 1.121889198926153e-05, "loss": 0.7036, "step": 4479 }, { "epoch": 0.3153819077789511, "grad_norm": 1.7817151546478271, "learning_rate": 1.12176182369212e-05, "loss": 0.6942, "step": 4480 }, { "epoch": 0.31545230552622316, "grad_norm": 2.097374439239502, "learning_rate": 1.1216344265303077e-05, "loss": 0.8156, "step": 4481 }, { "epoch": 0.31552270327349524, "grad_norm": 1.506516933441162, "learning_rate": 1.1215070074473397e-05, "loss": 0.7292, "step": 4482 }, { "epoch": 0.3155931010207673, "grad_norm": 1.7547513246536255, "learning_rate": 1.1213795664498407e-05, "loss": 0.7284, "step": 4483 }, { "epoch": 0.3156634987680394, "grad_norm": 1.8306903839111328, "learning_rate": 1.1212521035444364e-05, "loss": 0.6504, "step": 4484 }, { "epoch": 0.31573389651531153, "grad_norm": 1.9615957736968994, "learning_rate": 1.1211246187377538e-05, "loss": 0.6272, "step": 4485 }, { "epoch": 0.3158042942625836, "grad_norm": 1.7946009635925293, "learning_rate": 1.1209971120364209e-05, "loss": 0.7097, "step": 4486 }, { "epoch": 0.3158746920098557, "grad_norm": 1.7886618375778198, "learning_rate": 1.1208695834470667e-05, "loss": 0.7, "step": 4487 }, { "epoch": 0.31594508975712776, "grad_norm": 1.8595759868621826, "learning_rate": 1.1207420329763216e-05, "loss": 0.6764, "step": 4488 }, { "epoch": 0.31601548750439984, "grad_norm": 1.763343334197998, "learning_rate": 1.1206144606308176e-05, "loss": 0.7526, "step": 4489 }, { "epoch": 0.31608588525167197, "grad_norm": 1.8307584524154663, "learning_rate": 1.1204868664171864e-05, "loss": 0.7388, "step": 4490 }, { "epoch": 0.31615628299894405, "grad_norm": 1.9727236032485962, "learning_rate": 1.1203592503420626e-05, "loss": 0.7091, "step": 4491 }, { "epoch": 0.3162266807462161, "grad_norm": 2.02842378616333, "learning_rate": 1.1202316124120803e-05, "loss": 0.8613, "step": 4492 }, { "epoch": 0.3162970784934882, "grad_norm": 1.482226014137268, "learning_rate": 1.1201039526338762e-05, "loss": 0.6123, "step": 4493 }, { "epoch": 0.3163674762407603, "grad_norm": 2.0035338401794434, "learning_rate": 1.1199762710140868e-05, "loss": 0.7752, "step": 4494 }, { "epoch": 0.31643787398803236, "grad_norm": 1.8785594701766968, "learning_rate": 1.1198485675593511e-05, "loss": 0.8222, "step": 4495 }, { "epoch": 0.3165082717353045, "grad_norm": 1.8970451354980469, "learning_rate": 1.1197208422763079e-05, "loss": 0.7993, "step": 4496 }, { "epoch": 0.31657866948257657, "grad_norm": 1.860840082168579, "learning_rate": 1.119593095171598e-05, "loss": 0.6707, "step": 4497 }, { "epoch": 0.31664906722984865, "grad_norm": 1.8211252689361572, "learning_rate": 1.1194653262518632e-05, "loss": 0.7303, "step": 4498 }, { "epoch": 0.3167194649771207, "grad_norm": 1.6276040077209473, "learning_rate": 1.1193375355237461e-05, "loss": 0.6663, "step": 4499 }, { "epoch": 0.3167898627243928, "grad_norm": 1.928048849105835, "learning_rate": 1.1192097229938907e-05, "loss": 0.8593, "step": 4500 }, { "epoch": 0.31686026047166493, "grad_norm": 1.8128437995910645, "learning_rate": 1.1190818886689423e-05, "loss": 0.7887, "step": 4501 }, { "epoch": 0.316930658218937, "grad_norm": 1.8515598773956299, "learning_rate": 1.1189540325555466e-05, "loss": 0.671, "step": 4502 }, { "epoch": 0.3170010559662091, "grad_norm": 1.922252893447876, "learning_rate": 1.1188261546603518e-05, "loss": 0.7222, "step": 4503 }, { "epoch": 0.31707145371348117, "grad_norm": 1.933125615119934, "learning_rate": 1.1186982549900056e-05, "loss": 0.6727, "step": 4504 }, { "epoch": 0.31714185146075324, "grad_norm": 1.9465702772140503, "learning_rate": 1.118570333551158e-05, "loss": 0.7115, "step": 4505 }, { "epoch": 0.3172122492080253, "grad_norm": 1.9398869276046753, "learning_rate": 1.1184423903504599e-05, "loss": 0.8072, "step": 4506 }, { "epoch": 0.31728264695529745, "grad_norm": 1.8738842010498047, "learning_rate": 1.1183144253945626e-05, "loss": 0.7339, "step": 4507 }, { "epoch": 0.31735304470256953, "grad_norm": 2.0439021587371826, "learning_rate": 1.1181864386901198e-05, "loss": 0.7738, "step": 4508 }, { "epoch": 0.3174234424498416, "grad_norm": 1.9848687648773193, "learning_rate": 1.118058430243785e-05, "loss": 0.7636, "step": 4509 }, { "epoch": 0.3174938401971137, "grad_norm": 2.0234215259552, "learning_rate": 1.117930400062214e-05, "loss": 0.7543, "step": 4510 }, { "epoch": 0.31756423794438576, "grad_norm": 1.9393932819366455, "learning_rate": 1.1178023481520629e-05, "loss": 0.7375, "step": 4511 }, { "epoch": 0.31763463569165784, "grad_norm": 1.8264366388320923, "learning_rate": 1.1176742745199894e-05, "loss": 0.6981, "step": 4512 }, { "epoch": 0.31770503343893, "grad_norm": 1.6543028354644775, "learning_rate": 1.1175461791726522e-05, "loss": 0.5147, "step": 4513 }, { "epoch": 0.31777543118620205, "grad_norm": 1.7566689252853394, "learning_rate": 1.1174180621167108e-05, "loss": 0.6977, "step": 4514 }, { "epoch": 0.31784582893347413, "grad_norm": 1.8014500141143799, "learning_rate": 1.1172899233588265e-05, "loss": 0.7445, "step": 4515 }, { "epoch": 0.3179162266807462, "grad_norm": 1.6694457530975342, "learning_rate": 1.117161762905661e-05, "loss": 0.6696, "step": 4516 }, { "epoch": 0.3179866244280183, "grad_norm": 1.9723091125488281, "learning_rate": 1.1170335807638778e-05, "loss": 0.8354, "step": 4517 }, { "epoch": 0.3180570221752904, "grad_norm": 1.5885682106018066, "learning_rate": 1.116905376940141e-05, "loss": 0.7075, "step": 4518 }, { "epoch": 0.3181274199225625, "grad_norm": 1.662989616394043, "learning_rate": 1.116777151441116e-05, "loss": 0.7986, "step": 4519 }, { "epoch": 0.31819781766983457, "grad_norm": 1.7282116413116455, "learning_rate": 1.1166489042734693e-05, "loss": 0.8845, "step": 4520 }, { "epoch": 0.31826821541710665, "grad_norm": 1.8073766231536865, "learning_rate": 1.116520635443869e-05, "loss": 0.928, "step": 4521 }, { "epoch": 0.3183386131643787, "grad_norm": 1.7906173467636108, "learning_rate": 1.1163923449589835e-05, "loss": 0.749, "step": 4522 }, { "epoch": 0.3184090109116508, "grad_norm": 2.1759002208709717, "learning_rate": 1.116264032825483e-05, "loss": 0.8061, "step": 4523 }, { "epoch": 0.31847940865892294, "grad_norm": 1.9174907207489014, "learning_rate": 1.1161356990500383e-05, "loss": 0.8493, "step": 4524 }, { "epoch": 0.318549806406195, "grad_norm": 2.140247344970703, "learning_rate": 1.1160073436393219e-05, "loss": 0.7096, "step": 4525 }, { "epoch": 0.3186202041534671, "grad_norm": 1.946632742881775, "learning_rate": 1.1158789666000067e-05, "loss": 0.6509, "step": 4526 }, { "epoch": 0.31869060190073917, "grad_norm": 1.7702847719192505, "learning_rate": 1.1157505679387676e-05, "loss": 0.6857, "step": 4527 }, { "epoch": 0.31876099964801125, "grad_norm": 3.1344213485717773, "learning_rate": 1.1156221476622797e-05, "loss": 0.7142, "step": 4528 }, { "epoch": 0.3188313973952834, "grad_norm": 1.7905479669570923, "learning_rate": 1.1154937057772202e-05, "loss": 0.7129, "step": 4529 }, { "epoch": 0.31890179514255546, "grad_norm": 1.8716684579849243, "learning_rate": 1.1153652422902666e-05, "loss": 0.6955, "step": 4530 }, { "epoch": 0.31897219288982753, "grad_norm": 2.758645534515381, "learning_rate": 1.1152367572080979e-05, "loss": 0.7816, "step": 4531 }, { "epoch": 0.3190425906370996, "grad_norm": 1.8886823654174805, "learning_rate": 1.1151082505373939e-05, "loss": 0.6938, "step": 4532 }, { "epoch": 0.3191129883843717, "grad_norm": 1.9985222816467285, "learning_rate": 1.1149797222848363e-05, "loss": 0.6005, "step": 4533 }, { "epoch": 0.31918338613164376, "grad_norm": 2.217329502105713, "learning_rate": 1.114851172457107e-05, "loss": 0.6462, "step": 4534 }, { "epoch": 0.3192537838789159, "grad_norm": 1.5407891273498535, "learning_rate": 1.1147226010608895e-05, "loss": 0.7123, "step": 4535 }, { "epoch": 0.319324181626188, "grad_norm": 1.9387365579605103, "learning_rate": 1.1145940081028683e-05, "loss": 0.7298, "step": 4536 }, { "epoch": 0.31939457937346005, "grad_norm": 1.7741397619247437, "learning_rate": 1.1144653935897293e-05, "loss": 0.5994, "step": 4537 }, { "epoch": 0.31946497712073213, "grad_norm": 2.4662833213806152, "learning_rate": 1.114336757528159e-05, "loss": 0.8573, "step": 4538 }, { "epoch": 0.3195353748680042, "grad_norm": 1.8365468978881836, "learning_rate": 1.1142080999248454e-05, "loss": 0.6287, "step": 4539 }, { "epoch": 0.3196057726152763, "grad_norm": 1.629912257194519, "learning_rate": 1.1140794207864777e-05, "loss": 0.7033, "step": 4540 }, { "epoch": 0.3196761703625484, "grad_norm": 1.7100404500961304, "learning_rate": 1.1139507201197459e-05, "loss": 0.781, "step": 4541 }, { "epoch": 0.3197465681098205, "grad_norm": 1.9585187435150146, "learning_rate": 1.1138219979313412e-05, "loss": 0.6616, "step": 4542 }, { "epoch": 0.31981696585709257, "grad_norm": 1.6772695779800415, "learning_rate": 1.1136932542279559e-05, "loss": 0.6891, "step": 4543 }, { "epoch": 0.31988736360436465, "grad_norm": 1.9039900302886963, "learning_rate": 1.113564489016284e-05, "loss": 0.7151, "step": 4544 }, { "epoch": 0.3199577613516367, "grad_norm": 2.159139633178711, "learning_rate": 1.1134357023030196e-05, "loss": 0.7002, "step": 4545 }, { "epoch": 0.32002815909890886, "grad_norm": 1.7786035537719727, "learning_rate": 1.1133068940948585e-05, "loss": 0.6595, "step": 4546 }, { "epoch": 0.32009855684618094, "grad_norm": 1.8771088123321533, "learning_rate": 1.1131780643984977e-05, "loss": 0.7539, "step": 4547 }, { "epoch": 0.320168954593453, "grad_norm": 1.5121489763259888, "learning_rate": 1.1130492132206352e-05, "loss": 0.6248, "step": 4548 }, { "epoch": 0.3202393523407251, "grad_norm": 2.047481060028076, "learning_rate": 1.11292034056797e-05, "loss": 0.728, "step": 4549 }, { "epoch": 0.32030975008799717, "grad_norm": 1.6514313220977783, "learning_rate": 1.1127914464472023e-05, "loss": 0.7265, "step": 4550 }, { "epoch": 0.32038014783526925, "grad_norm": 1.8785545825958252, "learning_rate": 1.1126625308650335e-05, "loss": 0.7066, "step": 4551 }, { "epoch": 0.3204505455825414, "grad_norm": 1.7513831853866577, "learning_rate": 1.112533593828166e-05, "loss": 0.7741, "step": 4552 }, { "epoch": 0.32052094332981346, "grad_norm": 2.0010428428649902, "learning_rate": 1.1124046353433035e-05, "loss": 0.6063, "step": 4553 }, { "epoch": 0.32059134107708553, "grad_norm": 1.9702069759368896, "learning_rate": 1.1122756554171502e-05, "loss": 0.7993, "step": 4554 }, { "epoch": 0.3206617388243576, "grad_norm": 1.8853092193603516, "learning_rate": 1.1121466540564123e-05, "loss": 0.6174, "step": 4555 }, { "epoch": 0.3207321365716297, "grad_norm": 1.942036747932434, "learning_rate": 1.1120176312677967e-05, "loss": 0.7992, "step": 4556 }, { "epoch": 0.3208025343189018, "grad_norm": 1.8211241960525513, "learning_rate": 1.1118885870580113e-05, "loss": 0.7139, "step": 4557 }, { "epoch": 0.3208729320661739, "grad_norm": 1.650458574295044, "learning_rate": 1.111759521433765e-05, "loss": 0.6779, "step": 4558 }, { "epoch": 0.320943329813446, "grad_norm": 1.901129961013794, "learning_rate": 1.1116304344017685e-05, "loss": 0.8199, "step": 4559 }, { "epoch": 0.32101372756071805, "grad_norm": 2.030168294906616, "learning_rate": 1.1115013259687329e-05, "loss": 0.6557, "step": 4560 }, { "epoch": 0.32108412530799013, "grad_norm": 1.7547634840011597, "learning_rate": 1.1113721961413707e-05, "loss": 0.6209, "step": 4561 }, { "epoch": 0.3211545230552622, "grad_norm": 1.9571545124053955, "learning_rate": 1.1112430449263954e-05, "loss": 0.7753, "step": 4562 }, { "epoch": 0.32122492080253434, "grad_norm": 2.167308807373047, "learning_rate": 1.1111138723305217e-05, "loss": 0.8753, "step": 4563 }, { "epoch": 0.3212953185498064, "grad_norm": 2.58717679977417, "learning_rate": 1.1109846783604654e-05, "loss": 0.7173, "step": 4564 }, { "epoch": 0.3213657162970785, "grad_norm": 3.0318500995635986, "learning_rate": 1.1108554630229436e-05, "loss": 0.6175, "step": 4565 }, { "epoch": 0.3214361140443506, "grad_norm": 1.869942307472229, "learning_rate": 1.1107262263246741e-05, "loss": 0.6895, "step": 4566 }, { "epoch": 0.32150651179162265, "grad_norm": 2.0008323192596436, "learning_rate": 1.1105969682723763e-05, "loss": 0.783, "step": 4567 }, { "epoch": 0.32157690953889473, "grad_norm": 1.5717226266860962, "learning_rate": 1.11046768887277e-05, "loss": 0.6616, "step": 4568 }, { "epoch": 0.32164730728616686, "grad_norm": 1.7672160863876343, "learning_rate": 1.110338388132577e-05, "loss": 0.8188, "step": 4569 }, { "epoch": 0.32171770503343894, "grad_norm": 1.9061336517333984, "learning_rate": 1.1102090660585196e-05, "loss": 0.7064, "step": 4570 }, { "epoch": 0.321788102780711, "grad_norm": 1.7830960750579834, "learning_rate": 1.1100797226573213e-05, "loss": 0.7017, "step": 4571 }, { "epoch": 0.3218585005279831, "grad_norm": 1.8475191593170166, "learning_rate": 1.1099503579357067e-05, "loss": 0.8142, "step": 4572 }, { "epoch": 0.32192889827525517, "grad_norm": 1.7821784019470215, "learning_rate": 1.1098209719004018e-05, "loss": 0.7295, "step": 4573 }, { "epoch": 0.3219992960225273, "grad_norm": 1.7113350629806519, "learning_rate": 1.1096915645581333e-05, "loss": 0.6207, "step": 4574 }, { "epoch": 0.3220696937697994, "grad_norm": 1.9233964681625366, "learning_rate": 1.1095621359156296e-05, "loss": 0.7547, "step": 4575 }, { "epoch": 0.32214009151707146, "grad_norm": 1.9837182760238647, "learning_rate": 1.1094326859796193e-05, "loss": 0.6882, "step": 4576 }, { "epoch": 0.32221048926434354, "grad_norm": 1.8245409727096558, "learning_rate": 1.109303214756833e-05, "loss": 0.7248, "step": 4577 }, { "epoch": 0.3222808870116156, "grad_norm": 2.453852891921997, "learning_rate": 1.1091737222540018e-05, "loss": 0.7288, "step": 4578 }, { "epoch": 0.3223512847588877, "grad_norm": 1.9179730415344238, "learning_rate": 1.1090442084778582e-05, "loss": 0.697, "step": 4579 }, { "epoch": 0.3224216825061598, "grad_norm": 1.748883605003357, "learning_rate": 1.1089146734351356e-05, "loss": 0.5814, "step": 4580 }, { "epoch": 0.3224920802534319, "grad_norm": 2.091643810272217, "learning_rate": 1.1087851171325692e-05, "loss": 0.5576, "step": 4581 }, { "epoch": 0.322562478000704, "grad_norm": 1.7303913831710815, "learning_rate": 1.1086555395768938e-05, "loss": 0.6461, "step": 4582 }, { "epoch": 0.32263287574797606, "grad_norm": 2.162034511566162, "learning_rate": 1.1085259407748472e-05, "loss": 0.8161, "step": 4583 }, { "epoch": 0.32270327349524813, "grad_norm": 1.9917607307434082, "learning_rate": 1.108396320733167e-05, "loss": 0.633, "step": 4584 }, { "epoch": 0.32277367124252027, "grad_norm": 1.821867823600769, "learning_rate": 1.1082666794585921e-05, "loss": 0.7088, "step": 4585 }, { "epoch": 0.32284406898979234, "grad_norm": 1.887601613998413, "learning_rate": 1.1081370169578629e-05, "loss": 0.6092, "step": 4586 }, { "epoch": 0.3229144667370644, "grad_norm": 1.7601211071014404, "learning_rate": 1.1080073332377203e-05, "loss": 0.7292, "step": 4587 }, { "epoch": 0.3229848644843365, "grad_norm": 1.98417329788208, "learning_rate": 1.107877628304907e-05, "loss": 0.7369, "step": 4588 }, { "epoch": 0.3230552622316086, "grad_norm": 1.8054105043411255, "learning_rate": 1.1077479021661664e-05, "loss": 0.7677, "step": 4589 }, { "epoch": 0.32312565997888065, "grad_norm": 1.7453300952911377, "learning_rate": 1.1076181548282432e-05, "loss": 0.6913, "step": 4590 }, { "epoch": 0.3231960577261528, "grad_norm": 1.6390944719314575, "learning_rate": 1.107488386297883e-05, "loss": 0.7593, "step": 4591 }, { "epoch": 0.32326645547342486, "grad_norm": 2.281402111053467, "learning_rate": 1.1073585965818324e-05, "loss": 0.7702, "step": 4592 }, { "epoch": 0.32333685322069694, "grad_norm": 1.8012315034866333, "learning_rate": 1.1072287856868396e-05, "loss": 0.7866, "step": 4593 }, { "epoch": 0.323407250967969, "grad_norm": 1.9631046056747437, "learning_rate": 1.1070989536196533e-05, "loss": 0.7146, "step": 4594 }, { "epoch": 0.3234776487152411, "grad_norm": 1.7890956401824951, "learning_rate": 1.1069691003870236e-05, "loss": 0.6994, "step": 4595 }, { "epoch": 0.3235480464625132, "grad_norm": 1.6904252767562866, "learning_rate": 1.106839225995702e-05, "loss": 0.7117, "step": 4596 }, { "epoch": 0.3236184442097853, "grad_norm": 1.975051760673523, "learning_rate": 1.1067093304524406e-05, "loss": 0.7867, "step": 4597 }, { "epoch": 0.3236888419570574, "grad_norm": 1.7070729732513428, "learning_rate": 1.1065794137639929e-05, "loss": 0.7419, "step": 4598 }, { "epoch": 0.32375923970432946, "grad_norm": 1.7105756998062134, "learning_rate": 1.1064494759371128e-05, "loss": 0.7156, "step": 4599 }, { "epoch": 0.32382963745160154, "grad_norm": 1.9721319675445557, "learning_rate": 1.1063195169785566e-05, "loss": 0.72, "step": 4600 }, { "epoch": 0.3239000351988736, "grad_norm": 2.0302064418792725, "learning_rate": 1.1061895368950809e-05, "loss": 0.7413, "step": 4601 }, { "epoch": 0.32397043294614575, "grad_norm": 2.002274513244629, "learning_rate": 1.106059535693443e-05, "loss": 0.754, "step": 4602 }, { "epoch": 0.3240408306934178, "grad_norm": 2.106182098388672, "learning_rate": 1.1059295133804022e-05, "loss": 0.7162, "step": 4603 }, { "epoch": 0.3241112284406899, "grad_norm": 1.8628323078155518, "learning_rate": 1.1057994699627185e-05, "loss": 0.7207, "step": 4604 }, { "epoch": 0.324181626187962, "grad_norm": 1.656383991241455, "learning_rate": 1.1056694054471527e-05, "loss": 0.6618, "step": 4605 }, { "epoch": 0.32425202393523406, "grad_norm": 3.647233247756958, "learning_rate": 1.1055393198404673e-05, "loss": 0.6319, "step": 4606 }, { "epoch": 0.32432242168250613, "grad_norm": 2.2536778450012207, "learning_rate": 1.1054092131494251e-05, "loss": 0.6087, "step": 4607 }, { "epoch": 0.32439281942977827, "grad_norm": 1.7093312740325928, "learning_rate": 1.105279085380791e-05, "loss": 0.7468, "step": 4608 }, { "epoch": 0.32446321717705034, "grad_norm": 1.6143155097961426, "learning_rate": 1.10514893654133e-05, "loss": 0.7782, "step": 4609 }, { "epoch": 0.3245336149243224, "grad_norm": 1.9205514192581177, "learning_rate": 1.105018766637809e-05, "loss": 0.5773, "step": 4610 }, { "epoch": 0.3246040126715945, "grad_norm": 2.089189052581787, "learning_rate": 1.1048885756769956e-05, "loss": 0.7101, "step": 4611 }, { "epoch": 0.3246744104188666, "grad_norm": 1.739653468132019, "learning_rate": 1.1047583636656583e-05, "loss": 0.7111, "step": 4612 }, { "epoch": 0.3247448081661387, "grad_norm": 1.8078001737594604, "learning_rate": 1.1046281306105672e-05, "loss": 0.6738, "step": 4613 }, { "epoch": 0.3248152059134108, "grad_norm": 1.8395683765411377, "learning_rate": 1.104497876518493e-05, "loss": 0.6698, "step": 4614 }, { "epoch": 0.32488560366068286, "grad_norm": 1.9073609113693237, "learning_rate": 1.104367601396208e-05, "loss": 0.6531, "step": 4615 }, { "epoch": 0.32495600140795494, "grad_norm": 1.5625337362289429, "learning_rate": 1.104237305250485e-05, "loss": 0.6061, "step": 4616 }, { "epoch": 0.325026399155227, "grad_norm": 2.3765017986297607, "learning_rate": 1.1041069880880987e-05, "loss": 0.8023, "step": 4617 }, { "epoch": 0.3250967969024991, "grad_norm": 1.7506581544876099, "learning_rate": 1.1039766499158238e-05, "loss": 0.6371, "step": 4618 }, { "epoch": 0.32516719464977123, "grad_norm": 1.9522844552993774, "learning_rate": 1.1038462907404372e-05, "loss": 0.7115, "step": 4619 }, { "epoch": 0.3252375923970433, "grad_norm": 2.135820150375366, "learning_rate": 1.1037159105687162e-05, "loss": 0.7412, "step": 4620 }, { "epoch": 0.3253079901443154, "grad_norm": 1.8366615772247314, "learning_rate": 1.1035855094074394e-05, "loss": 0.8126, "step": 4621 }, { "epoch": 0.32537838789158746, "grad_norm": 1.8052557706832886, "learning_rate": 1.1034550872633863e-05, "loss": 0.736, "step": 4622 }, { "epoch": 0.32544878563885954, "grad_norm": 3.2700324058532715, "learning_rate": 1.103324644143338e-05, "loss": 0.6936, "step": 4623 }, { "epoch": 0.3255191833861316, "grad_norm": 1.7796331644058228, "learning_rate": 1.103194180054076e-05, "loss": 0.7045, "step": 4624 }, { "epoch": 0.32558958113340375, "grad_norm": 2.363058090209961, "learning_rate": 1.1030636950023835e-05, "loss": 0.6893, "step": 4625 }, { "epoch": 0.3256599788806758, "grad_norm": 2.303678035736084, "learning_rate": 1.1029331889950444e-05, "loss": 0.7803, "step": 4626 }, { "epoch": 0.3257303766279479, "grad_norm": 2.334019184112549, "learning_rate": 1.102802662038844e-05, "loss": 0.7398, "step": 4627 }, { "epoch": 0.32580077437522, "grad_norm": 2.066189765930176, "learning_rate": 1.1026721141405684e-05, "loss": 0.7208, "step": 4628 }, { "epoch": 0.32587117212249206, "grad_norm": 1.7049401998519897, "learning_rate": 1.1025415453070047e-05, "loss": 0.6645, "step": 4629 }, { "epoch": 0.3259415698697642, "grad_norm": 2.991091251373291, "learning_rate": 1.1024109555449417e-05, "loss": 0.8071, "step": 4630 }, { "epoch": 0.32601196761703627, "grad_norm": 2.0745015144348145, "learning_rate": 1.1022803448611687e-05, "loss": 0.7992, "step": 4631 }, { "epoch": 0.32608236536430835, "grad_norm": 1.8294928073883057, "learning_rate": 1.1021497132624763e-05, "loss": 0.6721, "step": 4632 }, { "epoch": 0.3261527631115804, "grad_norm": 2.14886212348938, "learning_rate": 1.1020190607556558e-05, "loss": 0.6396, "step": 4633 }, { "epoch": 0.3262231608588525, "grad_norm": 2.076068162918091, "learning_rate": 1.1018883873475004e-05, "loss": 0.728, "step": 4634 }, { "epoch": 0.3262935586061246, "grad_norm": 2.4488892555236816, "learning_rate": 1.1017576930448039e-05, "loss": 0.6635, "step": 4635 }, { "epoch": 0.3263639563533967, "grad_norm": 1.9414836168289185, "learning_rate": 1.1016269778543608e-05, "loss": 0.7165, "step": 4636 }, { "epoch": 0.3264343541006688, "grad_norm": 2.138953924179077, "learning_rate": 1.1014962417829677e-05, "loss": 0.6101, "step": 4637 }, { "epoch": 0.32650475184794087, "grad_norm": 3.3359997272491455, "learning_rate": 1.1013654848374212e-05, "loss": 0.635, "step": 4638 }, { "epoch": 0.32657514959521294, "grad_norm": 1.8301496505737305, "learning_rate": 1.1012347070245196e-05, "loss": 0.7697, "step": 4639 }, { "epoch": 0.326645547342485, "grad_norm": 4.553729057312012, "learning_rate": 1.1011039083510625e-05, "loss": 0.7247, "step": 4640 }, { "epoch": 0.32671594508975715, "grad_norm": 1.8439445495605469, "learning_rate": 1.1009730888238496e-05, "loss": 0.7486, "step": 4641 }, { "epoch": 0.32678634283702923, "grad_norm": 1.9864567518234253, "learning_rate": 1.1008422484496828e-05, "loss": 0.7061, "step": 4642 }, { "epoch": 0.3268567405843013, "grad_norm": 1.8083325624465942, "learning_rate": 1.1007113872353646e-05, "loss": 0.7466, "step": 4643 }, { "epoch": 0.3269271383315734, "grad_norm": 1.6427509784698486, "learning_rate": 1.1005805051876983e-05, "loss": 0.7971, "step": 4644 }, { "epoch": 0.32699753607884546, "grad_norm": 1.7813974618911743, "learning_rate": 1.1004496023134887e-05, "loss": 0.6812, "step": 4645 }, { "epoch": 0.32706793382611754, "grad_norm": 1.896641731262207, "learning_rate": 1.1003186786195419e-05, "loss": 0.7251, "step": 4646 }, { "epoch": 0.3271383315733897, "grad_norm": 1.6980366706848145, "learning_rate": 1.100187734112664e-05, "loss": 0.7754, "step": 4647 }, { "epoch": 0.32720872932066175, "grad_norm": 2.0339064598083496, "learning_rate": 1.1000567687996639e-05, "loss": 0.8209, "step": 4648 }, { "epoch": 0.32727912706793383, "grad_norm": 1.8670015335083008, "learning_rate": 1.0999257826873498e-05, "loss": 0.7814, "step": 4649 }, { "epoch": 0.3273495248152059, "grad_norm": 1.797736406326294, "learning_rate": 1.099794775782532e-05, "loss": 0.8017, "step": 4650 }, { "epoch": 0.327419922562478, "grad_norm": 2.3725647926330566, "learning_rate": 1.0996637480920219e-05, "loss": 0.7982, "step": 4651 }, { "epoch": 0.32749032030975006, "grad_norm": 2.1352100372314453, "learning_rate": 1.0995326996226314e-05, "loss": 0.7845, "step": 4652 }, { "epoch": 0.3275607180570222, "grad_norm": 1.9671199321746826, "learning_rate": 1.099401630381174e-05, "loss": 0.7134, "step": 4653 }, { "epoch": 0.32763111580429427, "grad_norm": 2.284864664077759, "learning_rate": 1.0992705403744644e-05, "loss": 0.6979, "step": 4654 }, { "epoch": 0.32770151355156635, "grad_norm": 1.9225486516952515, "learning_rate": 1.0991394296093175e-05, "loss": 0.621, "step": 4655 }, { "epoch": 0.3277719112988384, "grad_norm": 2.013397693634033, "learning_rate": 1.0990082980925503e-05, "loss": 0.7349, "step": 4656 }, { "epoch": 0.3278423090461105, "grad_norm": 8.000661849975586, "learning_rate": 1.0988771458309803e-05, "loss": 0.6884, "step": 4657 }, { "epoch": 0.32791270679338264, "grad_norm": 1.8687578439712524, "learning_rate": 1.0987459728314262e-05, "loss": 0.6876, "step": 4658 }, { "epoch": 0.3279831045406547, "grad_norm": 1.8512394428253174, "learning_rate": 1.0986147791007078e-05, "loss": 0.6985, "step": 4659 }, { "epoch": 0.3280535022879268, "grad_norm": 1.790837049484253, "learning_rate": 1.0984835646456463e-05, "loss": 0.734, "step": 4660 }, { "epoch": 0.32812390003519887, "grad_norm": 1.9225798845291138, "learning_rate": 1.0983523294730632e-05, "loss": 0.772, "step": 4661 }, { "epoch": 0.32819429778247095, "grad_norm": 2.0373215675354004, "learning_rate": 1.0982210735897817e-05, "loss": 0.7677, "step": 4662 }, { "epoch": 0.328264695529743, "grad_norm": 2.1895511150360107, "learning_rate": 1.098089797002626e-05, "loss": 0.8331, "step": 4663 }, { "epoch": 0.32833509327701516, "grad_norm": 2.0108344554901123, "learning_rate": 1.0979584997184211e-05, "loss": 0.8154, "step": 4664 }, { "epoch": 0.32840549102428723, "grad_norm": 2.508824586868286, "learning_rate": 1.0978271817439936e-05, "loss": 0.7742, "step": 4665 }, { "epoch": 0.3284758887715593, "grad_norm": 2.8454315662384033, "learning_rate": 1.0976958430861703e-05, "loss": 0.6922, "step": 4666 }, { "epoch": 0.3285462865188314, "grad_norm": 2.1691746711730957, "learning_rate": 1.0975644837517803e-05, "loss": 0.7377, "step": 4667 }, { "epoch": 0.32861668426610346, "grad_norm": 1.8969171047210693, "learning_rate": 1.0974331037476525e-05, "loss": 0.7452, "step": 4668 }, { "epoch": 0.3286870820133756, "grad_norm": 1.7713415622711182, "learning_rate": 1.0973017030806178e-05, "loss": 0.6608, "step": 4669 }, { "epoch": 0.3287574797606477, "grad_norm": 1.90436851978302, "learning_rate": 1.0971702817575077e-05, "loss": 0.7849, "step": 4670 }, { "epoch": 0.32882787750791975, "grad_norm": 2.22619891166687, "learning_rate": 1.097038839785155e-05, "loss": 0.768, "step": 4671 }, { "epoch": 0.32889827525519183, "grad_norm": 1.7953124046325684, "learning_rate": 1.0969073771703933e-05, "loss": 0.6407, "step": 4672 }, { "epoch": 0.3289686730024639, "grad_norm": 1.6793264150619507, "learning_rate": 1.0967758939200578e-05, "loss": 0.7896, "step": 4673 }, { "epoch": 0.329039070749736, "grad_norm": 2.3033206462860107, "learning_rate": 1.0966443900409841e-05, "loss": 0.7183, "step": 4674 }, { "epoch": 0.3291094684970081, "grad_norm": 1.9964032173156738, "learning_rate": 1.0965128655400094e-05, "loss": 0.6547, "step": 4675 }, { "epoch": 0.3291798662442802, "grad_norm": 1.9438152313232422, "learning_rate": 1.0963813204239717e-05, "loss": 0.7151, "step": 4676 }, { "epoch": 0.32925026399155227, "grad_norm": 1.685558557510376, "learning_rate": 1.0962497546997102e-05, "loss": 0.7081, "step": 4677 }, { "epoch": 0.32932066173882435, "grad_norm": 1.915696382522583, "learning_rate": 1.0961181683740649e-05, "loss": 0.6772, "step": 4678 }, { "epoch": 0.3293910594860964, "grad_norm": 1.9172163009643555, "learning_rate": 1.0959865614538776e-05, "loss": 0.7898, "step": 4679 }, { "epoch": 0.32946145723336856, "grad_norm": 1.8135061264038086, "learning_rate": 1.09585493394599e-05, "loss": 0.8221, "step": 4680 }, { "epoch": 0.32953185498064064, "grad_norm": 1.686639666557312, "learning_rate": 1.095723285857246e-05, "loss": 0.6394, "step": 4681 }, { "epoch": 0.3296022527279127, "grad_norm": 1.7097264528274536, "learning_rate": 1.0955916171944902e-05, "loss": 0.797, "step": 4682 }, { "epoch": 0.3296726504751848, "grad_norm": 1.8812217712402344, "learning_rate": 1.0954599279645679e-05, "loss": 0.6542, "step": 4683 }, { "epoch": 0.32974304822245687, "grad_norm": 1.7428539991378784, "learning_rate": 1.0953282181743255e-05, "loss": 0.6813, "step": 4684 }, { "epoch": 0.32981344596972895, "grad_norm": 1.6724555492401123, "learning_rate": 1.0951964878306113e-05, "loss": 0.6419, "step": 4685 }, { "epoch": 0.3298838437170011, "grad_norm": 1.4921215772628784, "learning_rate": 1.0950647369402737e-05, "loss": 0.6503, "step": 4686 }, { "epoch": 0.32995424146427316, "grad_norm": 1.7895342111587524, "learning_rate": 1.0949329655101624e-05, "loss": 0.7232, "step": 4687 }, { "epoch": 0.33002463921154523, "grad_norm": 1.8620413541793823, "learning_rate": 1.0948011735471289e-05, "loss": 0.7581, "step": 4688 }, { "epoch": 0.3300950369588173, "grad_norm": 2.2104923725128174, "learning_rate": 1.0946693610580246e-05, "loss": 0.6292, "step": 4689 }, { "epoch": 0.3301654347060894, "grad_norm": 1.8532063961029053, "learning_rate": 1.0945375280497031e-05, "loss": 0.7913, "step": 4690 }, { "epoch": 0.33023583245336147, "grad_norm": 2.110642910003662, "learning_rate": 1.094405674529018e-05, "loss": 0.657, "step": 4691 }, { "epoch": 0.3303062302006336, "grad_norm": 2.1547229290008545, "learning_rate": 1.0942738005028243e-05, "loss": 0.7583, "step": 4692 }, { "epoch": 0.3303766279479057, "grad_norm": 2.540841817855835, "learning_rate": 1.0941419059779793e-05, "loss": 0.7526, "step": 4693 }, { "epoch": 0.33044702569517775, "grad_norm": 1.8449803590774536, "learning_rate": 1.0940099909613393e-05, "loss": 0.685, "step": 4694 }, { "epoch": 0.33051742344244983, "grad_norm": 3.4339823722839355, "learning_rate": 1.093878055459763e-05, "loss": 0.6388, "step": 4695 }, { "epoch": 0.3305878211897219, "grad_norm": 1.8747515678405762, "learning_rate": 1.09374609948011e-05, "loss": 0.6391, "step": 4696 }, { "epoch": 0.33065821893699404, "grad_norm": 2.159512758255005, "learning_rate": 1.0936141230292408e-05, "loss": 0.642, "step": 4697 }, { "epoch": 0.3307286166842661, "grad_norm": 1.7504578828811646, "learning_rate": 1.0934821261140166e-05, "loss": 0.7659, "step": 4698 }, { "epoch": 0.3307990144315382, "grad_norm": 2.270479917526245, "learning_rate": 1.0933501087413005e-05, "loss": 0.7105, "step": 4699 }, { "epoch": 0.3308694121788103, "grad_norm": 2.078235626220703, "learning_rate": 1.0932180709179559e-05, "loss": 0.8259, "step": 4700 }, { "epoch": 0.33093980992608235, "grad_norm": 1.9788585901260376, "learning_rate": 1.0930860126508477e-05, "loss": 0.7448, "step": 4701 }, { "epoch": 0.33101020767335443, "grad_norm": 2.2282114028930664, "learning_rate": 1.0929539339468417e-05, "loss": 0.7237, "step": 4702 }, { "epoch": 0.33108060542062656, "grad_norm": 2.0466268062591553, "learning_rate": 1.0928218348128048e-05, "loss": 0.7003, "step": 4703 }, { "epoch": 0.33115100316789864, "grad_norm": 1.9221158027648926, "learning_rate": 1.092689715255605e-05, "loss": 0.6708, "step": 4704 }, { "epoch": 0.3312214009151707, "grad_norm": 1.8663239479064941, "learning_rate": 1.0925575752821114e-05, "loss": 0.7204, "step": 4705 }, { "epoch": 0.3312917986624428, "grad_norm": 1.7420791387557983, "learning_rate": 1.0924254148991937e-05, "loss": 0.7418, "step": 4706 }, { "epoch": 0.33136219640971487, "grad_norm": 1.9348806142807007, "learning_rate": 1.0922932341137232e-05, "loss": 0.7874, "step": 4707 }, { "epoch": 0.331432594156987, "grad_norm": 1.8459362983703613, "learning_rate": 1.0921610329325723e-05, "loss": 0.7885, "step": 4708 }, { "epoch": 0.3315029919042591, "grad_norm": 2.075474500656128, "learning_rate": 1.0920288113626143e-05, "loss": 0.6113, "step": 4709 }, { "epoch": 0.33157338965153116, "grad_norm": 2.03891658782959, "learning_rate": 1.0918965694107231e-05, "loss": 0.7423, "step": 4710 }, { "epoch": 0.33164378739880324, "grad_norm": 2.33455753326416, "learning_rate": 1.0917643070837744e-05, "loss": 0.6099, "step": 4711 }, { "epoch": 0.3317141851460753, "grad_norm": 1.6061279773712158, "learning_rate": 1.0916320243886446e-05, "loss": 0.7674, "step": 4712 }, { "epoch": 0.3317845828933474, "grad_norm": 1.7878636121749878, "learning_rate": 1.0914997213322114e-05, "loss": 0.7254, "step": 4713 }, { "epoch": 0.3318549806406195, "grad_norm": 1.9233156442642212, "learning_rate": 1.0913673979213528e-05, "loss": 0.6698, "step": 4714 }, { "epoch": 0.3319253783878916, "grad_norm": 2.1003808975219727, "learning_rate": 1.0912350541629488e-05, "loss": 0.6726, "step": 4715 }, { "epoch": 0.3319957761351637, "grad_norm": 1.7975786924362183, "learning_rate": 1.0911026900638802e-05, "loss": 0.7086, "step": 4716 }, { "epoch": 0.33206617388243576, "grad_norm": 1.9947775602340698, "learning_rate": 1.0909703056310283e-05, "loss": 0.6254, "step": 4717 }, { "epoch": 0.33213657162970783, "grad_norm": 1.5790218114852905, "learning_rate": 1.0908379008712764e-05, "loss": 0.7512, "step": 4718 }, { "epoch": 0.3322069693769799, "grad_norm": 1.6874760389328003, "learning_rate": 1.0907054757915076e-05, "loss": 0.7097, "step": 4719 }, { "epoch": 0.33227736712425204, "grad_norm": 2.1368863582611084, "learning_rate": 1.0905730303986078e-05, "loss": 0.7236, "step": 4720 }, { "epoch": 0.3323477648715241, "grad_norm": 1.934064507484436, "learning_rate": 1.0904405646994621e-05, "loss": 0.8579, "step": 4721 }, { "epoch": 0.3324181626187962, "grad_norm": 1.7855464220046997, "learning_rate": 1.0903080787009578e-05, "loss": 0.7868, "step": 4722 }, { "epoch": 0.3324885603660683, "grad_norm": 2.182596445083618, "learning_rate": 1.0901755724099833e-05, "loss": 0.7909, "step": 4723 }, { "epoch": 0.33255895811334035, "grad_norm": 2.315124988555908, "learning_rate": 1.0900430458334273e-05, "loss": 0.6257, "step": 4724 }, { "epoch": 0.3326293558606125, "grad_norm": 1.7875077724456787, "learning_rate": 1.0899104989781798e-05, "loss": 0.7563, "step": 4725 }, { "epoch": 0.33269975360788456, "grad_norm": 1.9466402530670166, "learning_rate": 1.0897779318511326e-05, "loss": 0.6127, "step": 4726 }, { "epoch": 0.33277015135515664, "grad_norm": 2.0292139053344727, "learning_rate": 1.0896453444591778e-05, "loss": 0.7127, "step": 4727 }, { "epoch": 0.3328405491024287, "grad_norm": 2.4154646396636963, "learning_rate": 1.0895127368092085e-05, "loss": 0.7138, "step": 4728 }, { "epoch": 0.3329109468497008, "grad_norm": 2.108973264694214, "learning_rate": 1.0893801089081193e-05, "loss": 0.7377, "step": 4729 }, { "epoch": 0.3329813445969729, "grad_norm": 1.9350554943084717, "learning_rate": 1.0892474607628057e-05, "loss": 0.7852, "step": 4730 }, { "epoch": 0.333051742344245, "grad_norm": 2.325301170349121, "learning_rate": 1.089114792380164e-05, "loss": 0.7529, "step": 4731 }, { "epoch": 0.3331221400915171, "grad_norm": 1.9331377744674683, "learning_rate": 1.0889821037670919e-05, "loss": 0.6306, "step": 4732 }, { "epoch": 0.33319253783878916, "grad_norm": 2.040936231613159, "learning_rate": 1.0888493949304878e-05, "loss": 0.8321, "step": 4733 }, { "epoch": 0.33326293558606124, "grad_norm": 2.2150509357452393, "learning_rate": 1.0887166658772517e-05, "loss": 0.689, "step": 4734 }, { "epoch": 0.3333333333333333, "grad_norm": 1.70967698097229, "learning_rate": 1.0885839166142838e-05, "loss": 0.6162, "step": 4735 }, { "epoch": 0.33340373108060545, "grad_norm": 2.1619012355804443, "learning_rate": 1.0884511471484863e-05, "loss": 0.748, "step": 4736 }, { "epoch": 0.3334741288278775, "grad_norm": 1.8263301849365234, "learning_rate": 1.088318357486762e-05, "loss": 0.71, "step": 4737 }, { "epoch": 0.3335445265751496, "grad_norm": 1.6185098886489868, "learning_rate": 1.0881855476360145e-05, "loss": 0.68, "step": 4738 }, { "epoch": 0.3336149243224217, "grad_norm": 1.7600154876708984, "learning_rate": 1.0880527176031487e-05, "loss": 0.7454, "step": 4739 }, { "epoch": 0.33368532206969376, "grad_norm": 3.198735237121582, "learning_rate": 1.0879198673950711e-05, "loss": 0.62, "step": 4740 }, { "epoch": 0.33375571981696583, "grad_norm": 1.7257229089736938, "learning_rate": 1.0877869970186879e-05, "loss": 0.7026, "step": 4741 }, { "epoch": 0.33382611756423797, "grad_norm": 2.410123348236084, "learning_rate": 1.0876541064809076e-05, "loss": 0.735, "step": 4742 }, { "epoch": 0.33389651531151004, "grad_norm": 1.8945331573486328, "learning_rate": 1.0875211957886391e-05, "loss": 0.758, "step": 4743 }, { "epoch": 0.3339669130587821, "grad_norm": 1.8061851263046265, "learning_rate": 1.0873882649487928e-05, "loss": 0.6849, "step": 4744 }, { "epoch": 0.3340373108060542, "grad_norm": 2.185396194458008, "learning_rate": 1.0872553139682797e-05, "loss": 0.6682, "step": 4745 }, { "epoch": 0.3341077085533263, "grad_norm": 2.0824451446533203, "learning_rate": 1.087122342854012e-05, "loss": 0.6979, "step": 4746 }, { "epoch": 0.33417810630059835, "grad_norm": 1.8123105764389038, "learning_rate": 1.0869893516129035e-05, "loss": 0.7356, "step": 4747 }, { "epoch": 0.3342485040478705, "grad_norm": 1.967579960823059, "learning_rate": 1.086856340251868e-05, "loss": 0.6489, "step": 4748 }, { "epoch": 0.33431890179514256, "grad_norm": 1.9450899362564087, "learning_rate": 1.0867233087778207e-05, "loss": 0.8192, "step": 4749 }, { "epoch": 0.33438929954241464, "grad_norm": 2.0892515182495117, "learning_rate": 1.0865902571976786e-05, "loss": 0.7144, "step": 4750 }, { "epoch": 0.3344596972896867, "grad_norm": 1.9026561975479126, "learning_rate": 1.086457185518359e-05, "loss": 0.6678, "step": 4751 }, { "epoch": 0.3345300950369588, "grad_norm": 1.8951181173324585, "learning_rate": 1.08632409374678e-05, "loss": 0.6042, "step": 4752 }, { "epoch": 0.33460049278423093, "grad_norm": 1.743672251701355, "learning_rate": 1.0861909818898617e-05, "loss": 0.6739, "step": 4753 }, { "epoch": 0.334670890531503, "grad_norm": 2.1553544998168945, "learning_rate": 1.0860578499545245e-05, "loss": 0.6878, "step": 4754 }, { "epoch": 0.3347412882787751, "grad_norm": 2.097257614135742, "learning_rate": 1.08592469794769e-05, "loss": 0.7007, "step": 4755 }, { "epoch": 0.33481168602604716, "grad_norm": 2.411172389984131, "learning_rate": 1.085791525876281e-05, "loss": 0.7965, "step": 4756 }, { "epoch": 0.33488208377331924, "grad_norm": 2.012063503265381, "learning_rate": 1.0856583337472211e-05, "loss": 0.6854, "step": 4757 }, { "epoch": 0.3349524815205913, "grad_norm": 2.7740111351013184, "learning_rate": 1.0855251215674352e-05, "loss": 0.7171, "step": 4758 }, { "epoch": 0.33502287926786345, "grad_norm": 2.2028841972351074, "learning_rate": 1.085391889343849e-05, "loss": 0.7016, "step": 4759 }, { "epoch": 0.3350932770151355, "grad_norm": 2.190796375274658, "learning_rate": 1.0852586370833896e-05, "loss": 0.6874, "step": 4760 }, { "epoch": 0.3351636747624076, "grad_norm": 1.761626958847046, "learning_rate": 1.0851253647929846e-05, "loss": 0.6215, "step": 4761 }, { "epoch": 0.3352340725096797, "grad_norm": 1.8184051513671875, "learning_rate": 1.0849920724795631e-05, "loss": 0.683, "step": 4762 }, { "epoch": 0.33530447025695176, "grad_norm": 2.036888360977173, "learning_rate": 1.0848587601500552e-05, "loss": 0.8042, "step": 4763 }, { "epoch": 0.3353748680042239, "grad_norm": 2.1575939655303955, "learning_rate": 1.0847254278113916e-05, "loss": 0.7046, "step": 4764 }, { "epoch": 0.33544526575149597, "grad_norm": 1.8281302452087402, "learning_rate": 1.0845920754705049e-05, "loss": 0.7098, "step": 4765 }, { "epoch": 0.33551566349876805, "grad_norm": 2.024590015411377, "learning_rate": 1.0844587031343277e-05, "loss": 0.7736, "step": 4766 }, { "epoch": 0.3355860612460401, "grad_norm": 1.764366865158081, "learning_rate": 1.0843253108097945e-05, "loss": 0.7286, "step": 4767 }, { "epoch": 0.3356564589933122, "grad_norm": 2.2416298389434814, "learning_rate": 1.08419189850384e-05, "loss": 0.8414, "step": 4768 }, { "epoch": 0.3357268567405843, "grad_norm": 2.8116981983184814, "learning_rate": 1.084058466223401e-05, "loss": 0.7174, "step": 4769 }, { "epoch": 0.3357972544878564, "grad_norm": 1.9066141843795776, "learning_rate": 1.0839250139754144e-05, "loss": 0.7527, "step": 4770 }, { "epoch": 0.3358676522351285, "grad_norm": 2.449547290802002, "learning_rate": 1.0837915417668187e-05, "loss": 0.7145, "step": 4771 }, { "epoch": 0.33593804998240057, "grad_norm": 1.8601627349853516, "learning_rate": 1.083658049604553e-05, "loss": 0.727, "step": 4772 }, { "epoch": 0.33600844772967264, "grad_norm": 2.84259295463562, "learning_rate": 1.0835245374955579e-05, "loss": 0.7895, "step": 4773 }, { "epoch": 0.3360788454769447, "grad_norm": 2.7232141494750977, "learning_rate": 1.083391005446775e-05, "loss": 0.6844, "step": 4774 }, { "epoch": 0.3361492432242168, "grad_norm": 1.764163613319397, "learning_rate": 1.0832574534651463e-05, "loss": 0.7348, "step": 4775 }, { "epoch": 0.33621964097148893, "grad_norm": 1.9001750946044922, "learning_rate": 1.0831238815576156e-05, "loss": 0.7329, "step": 4776 }, { "epoch": 0.336290038718761, "grad_norm": 2.014292001724243, "learning_rate": 1.0829902897311271e-05, "loss": 0.8083, "step": 4777 }, { "epoch": 0.3363604364660331, "grad_norm": 2.3185012340545654, "learning_rate": 1.0828566779926266e-05, "loss": 0.8117, "step": 4778 }, { "epoch": 0.33643083421330516, "grad_norm": 1.439197063446045, "learning_rate": 1.0827230463490609e-05, "loss": 0.6798, "step": 4779 }, { "epoch": 0.33650123196057724, "grad_norm": 1.7174010276794434, "learning_rate": 1.0825893948073773e-05, "loss": 0.7622, "step": 4780 }, { "epoch": 0.3365716297078494, "grad_norm": 1.7209681272506714, "learning_rate": 1.0824557233745246e-05, "loss": 0.7359, "step": 4781 }, { "epoch": 0.33664202745512145, "grad_norm": 1.896994948387146, "learning_rate": 1.0823220320574523e-05, "loss": 0.8063, "step": 4782 }, { "epoch": 0.33671242520239353, "grad_norm": 2.738091230392456, "learning_rate": 1.0821883208631116e-05, "loss": 0.7763, "step": 4783 }, { "epoch": 0.3367828229496656, "grad_norm": 1.7921277284622192, "learning_rate": 1.0820545897984538e-05, "loss": 0.6726, "step": 4784 }, { "epoch": 0.3368532206969377, "grad_norm": 1.919130563735962, "learning_rate": 1.0819208388704319e-05, "loss": 0.5687, "step": 4785 }, { "epoch": 0.33692361844420976, "grad_norm": 1.580159068107605, "learning_rate": 1.0817870680859996e-05, "loss": 0.6453, "step": 4786 }, { "epoch": 0.3369940161914819, "grad_norm": 2.0363929271698, "learning_rate": 1.081653277452112e-05, "loss": 0.7427, "step": 4787 }, { "epoch": 0.33706441393875397, "grad_norm": 1.9075959920883179, "learning_rate": 1.0815194669757249e-05, "loss": 0.6176, "step": 4788 }, { "epoch": 0.33713481168602605, "grad_norm": 2.177152395248413, "learning_rate": 1.0813856366637948e-05, "loss": 0.8394, "step": 4789 }, { "epoch": 0.3372052094332981, "grad_norm": 2.336824417114258, "learning_rate": 1.0812517865232804e-05, "loss": 0.6687, "step": 4790 }, { "epoch": 0.3372756071805702, "grad_norm": 1.6358850002288818, "learning_rate": 1.0811179165611404e-05, "loss": 0.8267, "step": 4791 }, { "epoch": 0.33734600492784234, "grad_norm": 2.1270229816436768, "learning_rate": 1.0809840267843347e-05, "loss": 0.8094, "step": 4792 }, { "epoch": 0.3374164026751144, "grad_norm": 2.1134791374206543, "learning_rate": 1.0808501171998242e-05, "loss": 0.7694, "step": 4793 }, { "epoch": 0.3374868004223865, "grad_norm": 1.8739615678787231, "learning_rate": 1.0807161878145713e-05, "loss": 0.7824, "step": 4794 }, { "epoch": 0.33755719816965857, "grad_norm": 1.4399677515029907, "learning_rate": 1.0805822386355391e-05, "loss": 0.5381, "step": 4795 }, { "epoch": 0.33762759591693065, "grad_norm": 2.081230401992798, "learning_rate": 1.0804482696696917e-05, "loss": 0.6851, "step": 4796 }, { "epoch": 0.3376979936642027, "grad_norm": 1.8418893814086914, "learning_rate": 1.080314280923994e-05, "loss": 0.8281, "step": 4797 }, { "epoch": 0.33776839141147486, "grad_norm": 2.004171133041382, "learning_rate": 1.0801802724054123e-05, "loss": 0.7638, "step": 4798 }, { "epoch": 0.33783878915874693, "grad_norm": 2.7696192264556885, "learning_rate": 1.0800462441209142e-05, "loss": 0.6976, "step": 4799 }, { "epoch": 0.337909186906019, "grad_norm": 1.6319090127944946, "learning_rate": 1.0799121960774675e-05, "loss": 0.6897, "step": 4800 }, { "epoch": 0.3379795846532911, "grad_norm": 2.0951921939849854, "learning_rate": 1.0797781282820418e-05, "loss": 0.7923, "step": 4801 }, { "epoch": 0.33804998240056316, "grad_norm": 1.6687337160110474, "learning_rate": 1.0796440407416073e-05, "loss": 0.652, "step": 4802 }, { "epoch": 0.33812038014783524, "grad_norm": 2.0823001861572266, "learning_rate": 1.079509933463135e-05, "loss": 0.6613, "step": 4803 }, { "epoch": 0.3381907778951074, "grad_norm": 1.8736237287521362, "learning_rate": 1.0793758064535979e-05, "loss": 0.7988, "step": 4804 }, { "epoch": 0.33826117564237945, "grad_norm": 1.9831979274749756, "learning_rate": 1.0792416597199689e-05, "loss": 0.7481, "step": 4805 }, { "epoch": 0.33833157338965153, "grad_norm": 2.1690144538879395, "learning_rate": 1.0791074932692225e-05, "loss": 0.5982, "step": 4806 }, { "epoch": 0.3384019711369236, "grad_norm": 2.390871047973633, "learning_rate": 1.0789733071083342e-05, "loss": 0.6691, "step": 4807 }, { "epoch": 0.3384723688841957, "grad_norm": 2.6683948040008545, "learning_rate": 1.0788391012442804e-05, "loss": 0.7155, "step": 4808 }, { "epoch": 0.3385427666314678, "grad_norm": 1.8321161270141602, "learning_rate": 1.0787048756840388e-05, "loss": 0.7028, "step": 4809 }, { "epoch": 0.3386131643787399, "grad_norm": 2.3082079887390137, "learning_rate": 1.0785706304345876e-05, "loss": 0.6757, "step": 4810 }, { "epoch": 0.33868356212601197, "grad_norm": 2.2892351150512695, "learning_rate": 1.0784363655029066e-05, "loss": 0.7213, "step": 4811 }, { "epoch": 0.33875395987328405, "grad_norm": 1.767261266708374, "learning_rate": 1.078302080895976e-05, "loss": 0.6609, "step": 4812 }, { "epoch": 0.3388243576205561, "grad_norm": 2.00166392326355, "learning_rate": 1.0781677766207777e-05, "loss": 0.7289, "step": 4813 }, { "epoch": 0.3388947553678282, "grad_norm": 1.9623041152954102, "learning_rate": 1.0780334526842944e-05, "loss": 0.7828, "step": 4814 }, { "epoch": 0.33896515311510034, "grad_norm": 2.165710687637329, "learning_rate": 1.0778991090935092e-05, "loss": 0.5972, "step": 4815 }, { "epoch": 0.3390355508623724, "grad_norm": 2.0018668174743652, "learning_rate": 1.0777647458554073e-05, "loss": 0.5969, "step": 4816 }, { "epoch": 0.3391059486096445, "grad_norm": 2.0146892070770264, "learning_rate": 1.077630362976974e-05, "loss": 0.7242, "step": 4817 }, { "epoch": 0.33917634635691657, "grad_norm": 1.777526617050171, "learning_rate": 1.0774959604651963e-05, "loss": 0.7127, "step": 4818 }, { "epoch": 0.33924674410418865, "grad_norm": 1.95378839969635, "learning_rate": 1.0773615383270615e-05, "loss": 0.7646, "step": 4819 }, { "epoch": 0.3393171418514608, "grad_norm": 1.7871085405349731, "learning_rate": 1.0772270965695587e-05, "loss": 0.7292, "step": 4820 }, { "epoch": 0.33938753959873286, "grad_norm": 2.1120598316192627, "learning_rate": 1.0770926351996777e-05, "loss": 0.7173, "step": 4821 }, { "epoch": 0.33945793734600493, "grad_norm": 2.3410491943359375, "learning_rate": 1.076958154224409e-05, "loss": 0.7755, "step": 4822 }, { "epoch": 0.339528335093277, "grad_norm": 2.0269722938537598, "learning_rate": 1.076823653650744e-05, "loss": 0.5835, "step": 4823 }, { "epoch": 0.3395987328405491, "grad_norm": 2.276371479034424, "learning_rate": 1.0766891334856767e-05, "loss": 0.7165, "step": 4824 }, { "epoch": 0.33966913058782117, "grad_norm": 1.974388599395752, "learning_rate": 1.0765545937361996e-05, "loss": 0.6987, "step": 4825 }, { "epoch": 0.3397395283350933, "grad_norm": 1.8961961269378662, "learning_rate": 1.0764200344093084e-05, "loss": 0.6673, "step": 4826 }, { "epoch": 0.3398099260823654, "grad_norm": 2.1117336750030518, "learning_rate": 1.0762854555119987e-05, "loss": 0.7833, "step": 4827 }, { "epoch": 0.33988032382963745, "grad_norm": 1.9031888246536255, "learning_rate": 1.0761508570512674e-05, "loss": 0.8093, "step": 4828 }, { "epoch": 0.33995072157690953, "grad_norm": 1.882541537284851, "learning_rate": 1.076016239034112e-05, "loss": 0.7219, "step": 4829 }, { "epoch": 0.3400211193241816, "grad_norm": 2.7898292541503906, "learning_rate": 1.0758816014675325e-05, "loss": 0.6264, "step": 4830 }, { "epoch": 0.3400915170714537, "grad_norm": 1.8471040725708008, "learning_rate": 1.0757469443585276e-05, "loss": 0.7071, "step": 4831 }, { "epoch": 0.3401619148187258, "grad_norm": 2.1000967025756836, "learning_rate": 1.075612267714099e-05, "loss": 0.7727, "step": 4832 }, { "epoch": 0.3402323125659979, "grad_norm": 1.8035727739334106, "learning_rate": 1.0754775715412483e-05, "loss": 0.6613, "step": 4833 }, { "epoch": 0.34030271031327, "grad_norm": 2.4467926025390625, "learning_rate": 1.0753428558469786e-05, "loss": 0.6499, "step": 4834 }, { "epoch": 0.34037310806054205, "grad_norm": 1.7585070133209229, "learning_rate": 1.075208120638294e-05, "loss": 0.7032, "step": 4835 }, { "epoch": 0.34044350580781413, "grad_norm": 1.776389479637146, "learning_rate": 1.0750733659221994e-05, "loss": 0.7284, "step": 4836 }, { "epoch": 0.34051390355508626, "grad_norm": 2.801448345184326, "learning_rate": 1.0749385917057008e-05, "loss": 0.7993, "step": 4837 }, { "epoch": 0.34058430130235834, "grad_norm": 1.9662574529647827, "learning_rate": 1.074803797995805e-05, "loss": 0.7151, "step": 4838 }, { "epoch": 0.3406546990496304, "grad_norm": 1.9715665578842163, "learning_rate": 1.0746689847995209e-05, "loss": 0.7449, "step": 4839 }, { "epoch": 0.3407250967969025, "grad_norm": 2.480294704437256, "learning_rate": 1.0745341521238564e-05, "loss": 0.7287, "step": 4840 }, { "epoch": 0.34079549454417457, "grad_norm": 1.5608187913894653, "learning_rate": 1.0743992999758223e-05, "loss": 0.7455, "step": 4841 }, { "epoch": 0.34086589229144665, "grad_norm": 2.1583945751190186, "learning_rate": 1.0742644283624296e-05, "loss": 0.7563, "step": 4842 }, { "epoch": 0.3409362900387188, "grad_norm": 1.9253653287887573, "learning_rate": 1.0741295372906902e-05, "loss": 0.6396, "step": 4843 }, { "epoch": 0.34100668778599086, "grad_norm": 1.9534454345703125, "learning_rate": 1.0739946267676173e-05, "loss": 0.685, "step": 4844 }, { "epoch": 0.34107708553326294, "grad_norm": 1.8217928409576416, "learning_rate": 1.0738596968002254e-05, "loss": 0.6119, "step": 4845 }, { "epoch": 0.341147483280535, "grad_norm": 2.2171595096588135, "learning_rate": 1.0737247473955288e-05, "loss": 0.7464, "step": 4846 }, { "epoch": 0.3412178810278071, "grad_norm": 1.6931865215301514, "learning_rate": 1.0735897785605442e-05, "loss": 0.8012, "step": 4847 }, { "epoch": 0.3412882787750792, "grad_norm": 1.7015212774276733, "learning_rate": 1.073454790302289e-05, "loss": 0.6851, "step": 4848 }, { "epoch": 0.3413586765223513, "grad_norm": 1.8041236400604248, "learning_rate": 1.0733197826277807e-05, "loss": 0.8114, "step": 4849 }, { "epoch": 0.3414290742696234, "grad_norm": 1.9889227151870728, "learning_rate": 1.073184755544039e-05, "loss": 0.6677, "step": 4850 }, { "epoch": 0.34149947201689546, "grad_norm": 1.9371503591537476, "learning_rate": 1.073049709058084e-05, "loss": 0.6381, "step": 4851 }, { "epoch": 0.34156986976416753, "grad_norm": 1.8300786018371582, "learning_rate": 1.0729146431769364e-05, "loss": 0.6669, "step": 4852 }, { "epoch": 0.3416402675114396, "grad_norm": 1.9158648252487183, "learning_rate": 1.0727795579076189e-05, "loss": 0.746, "step": 4853 }, { "epoch": 0.34171066525871174, "grad_norm": 1.555375576019287, "learning_rate": 1.0726444532571547e-05, "loss": 0.7297, "step": 4854 }, { "epoch": 0.3417810630059838, "grad_norm": 1.76763916015625, "learning_rate": 1.0725093292325677e-05, "loss": 0.631, "step": 4855 }, { "epoch": 0.3418514607532559, "grad_norm": 2.125959634780884, "learning_rate": 1.0723741858408834e-05, "loss": 0.7796, "step": 4856 }, { "epoch": 0.341921858500528, "grad_norm": 1.917842149734497, "learning_rate": 1.0722390230891281e-05, "loss": 0.6894, "step": 4857 }, { "epoch": 0.34199225624780005, "grad_norm": 1.7959773540496826, "learning_rate": 1.0721038409843287e-05, "loss": 0.6127, "step": 4858 }, { "epoch": 0.34206265399507213, "grad_norm": 2.2005560398101807, "learning_rate": 1.0719686395335134e-05, "loss": 0.7951, "step": 4859 }, { "epoch": 0.34213305174234426, "grad_norm": 1.6291238069534302, "learning_rate": 1.0718334187437122e-05, "loss": 0.678, "step": 4860 }, { "epoch": 0.34220344948961634, "grad_norm": 1.833293080329895, "learning_rate": 1.0716981786219543e-05, "loss": 0.6308, "step": 4861 }, { "epoch": 0.3422738472368884, "grad_norm": 1.7735939025878906, "learning_rate": 1.0715629191752718e-05, "loss": 0.6642, "step": 4862 }, { "epoch": 0.3423442449841605, "grad_norm": 1.8654438257217407, "learning_rate": 1.0714276404106964e-05, "loss": 0.6517, "step": 4863 }, { "epoch": 0.3424146427314326, "grad_norm": 1.7841194868087769, "learning_rate": 1.0712923423352617e-05, "loss": 0.7957, "step": 4864 }, { "epoch": 0.3424850404787047, "grad_norm": 1.9051513671875, "learning_rate": 1.0711570249560017e-05, "loss": 0.7348, "step": 4865 }, { "epoch": 0.3425554382259768, "grad_norm": 2.899679183959961, "learning_rate": 1.0710216882799522e-05, "loss": 0.6897, "step": 4866 }, { "epoch": 0.34262583597324886, "grad_norm": 2.1963939666748047, "learning_rate": 1.0708863323141488e-05, "loss": 0.6926, "step": 4867 }, { "epoch": 0.34269623372052094, "grad_norm": 1.6498035192489624, "learning_rate": 1.0707509570656291e-05, "loss": 0.7366, "step": 4868 }, { "epoch": 0.342766631467793, "grad_norm": 2.0913853645324707, "learning_rate": 1.0706155625414315e-05, "loss": 0.6354, "step": 4869 }, { "epoch": 0.3428370292150651, "grad_norm": 2.3661251068115234, "learning_rate": 1.070480148748595e-05, "loss": 0.831, "step": 4870 }, { "epoch": 0.3429074269623372, "grad_norm": 1.6716489791870117, "learning_rate": 1.0703447156941602e-05, "loss": 0.7617, "step": 4871 }, { "epoch": 0.3429778247096093, "grad_norm": 2.1577699184417725, "learning_rate": 1.0702092633851683e-05, "loss": 0.8423, "step": 4872 }, { "epoch": 0.3430482224568814, "grad_norm": 2.0851173400878906, "learning_rate": 1.0700737918286613e-05, "loss": 0.7607, "step": 4873 }, { "epoch": 0.34311862020415346, "grad_norm": 1.9092919826507568, "learning_rate": 1.0699383010316827e-05, "loss": 0.6722, "step": 4874 }, { "epoch": 0.34318901795142553, "grad_norm": 1.9479928016662598, "learning_rate": 1.0698027910012768e-05, "loss": 0.8096, "step": 4875 }, { "epoch": 0.34325941569869767, "grad_norm": 2.2419822216033936, "learning_rate": 1.0696672617444891e-05, "loss": 0.6854, "step": 4876 }, { "epoch": 0.34332981344596974, "grad_norm": 1.786512851715088, "learning_rate": 1.0695317132683653e-05, "loss": 0.7664, "step": 4877 }, { "epoch": 0.3434002111932418, "grad_norm": 1.9368327856063843, "learning_rate": 1.0693961455799533e-05, "loss": 0.6852, "step": 4878 }, { "epoch": 0.3434706089405139, "grad_norm": 1.7960407733917236, "learning_rate": 1.0692605586863012e-05, "loss": 0.6107, "step": 4879 }, { "epoch": 0.343541006687786, "grad_norm": 2.271759510040283, "learning_rate": 1.0691249525944583e-05, "loss": 0.578, "step": 4880 }, { "epoch": 0.34361140443505805, "grad_norm": 1.707755446434021, "learning_rate": 1.0689893273114745e-05, "loss": 0.7418, "step": 4881 }, { "epoch": 0.3436818021823302, "grad_norm": 2.61665940284729, "learning_rate": 1.0688536828444016e-05, "loss": 0.6586, "step": 4882 }, { "epoch": 0.34375219992960226, "grad_norm": 1.9910961389541626, "learning_rate": 1.0687180192002915e-05, "loss": 0.7349, "step": 4883 }, { "epoch": 0.34382259767687434, "grad_norm": 1.9239166975021362, "learning_rate": 1.068582336386198e-05, "loss": 0.6737, "step": 4884 }, { "epoch": 0.3438929954241464, "grad_norm": 1.738805890083313, "learning_rate": 1.0684466344091747e-05, "loss": 0.6565, "step": 4885 }, { "epoch": 0.3439633931714185, "grad_norm": 1.8537685871124268, "learning_rate": 1.0683109132762774e-05, "loss": 0.6585, "step": 4886 }, { "epoch": 0.34403379091869063, "grad_norm": 1.688008427619934, "learning_rate": 1.0681751729945622e-05, "loss": 0.6946, "step": 4887 }, { "epoch": 0.3441041886659627, "grad_norm": 1.916975498199463, "learning_rate": 1.068039413571086e-05, "loss": 0.7278, "step": 4888 }, { "epoch": 0.3441745864132348, "grad_norm": 2.190035820007324, "learning_rate": 1.0679036350129076e-05, "loss": 0.7539, "step": 4889 }, { "epoch": 0.34424498416050686, "grad_norm": 2.2998874187469482, "learning_rate": 1.067767837327086e-05, "loss": 0.7841, "step": 4890 }, { "epoch": 0.34431538190777894, "grad_norm": 2.3517253398895264, "learning_rate": 1.0676320205206814e-05, "loss": 0.7453, "step": 4891 }, { "epoch": 0.344385779655051, "grad_norm": 1.7720485925674438, "learning_rate": 1.0674961846007551e-05, "loss": 0.6817, "step": 4892 }, { "epoch": 0.34445617740232315, "grad_norm": 2.3588690757751465, "learning_rate": 1.0673603295743695e-05, "loss": 0.652, "step": 4893 }, { "epoch": 0.3445265751495952, "grad_norm": 2.088320732116699, "learning_rate": 1.0672244554485876e-05, "loss": 0.6081, "step": 4894 }, { "epoch": 0.3445969728968673, "grad_norm": 1.9637843370437622, "learning_rate": 1.0670885622304736e-05, "loss": 0.8192, "step": 4895 }, { "epoch": 0.3446673706441394, "grad_norm": 2.035407781600952, "learning_rate": 1.066952649927093e-05, "loss": 0.662, "step": 4896 }, { "epoch": 0.34473776839141146, "grad_norm": 1.8977701663970947, "learning_rate": 1.0668167185455115e-05, "loss": 0.7638, "step": 4897 }, { "epoch": 0.34480816613868354, "grad_norm": 1.8363027572631836, "learning_rate": 1.0666807680927967e-05, "loss": 0.7164, "step": 4898 }, { "epoch": 0.34487856388595567, "grad_norm": 1.9792706966400146, "learning_rate": 1.0665447985760167e-05, "loss": 0.7379, "step": 4899 }, { "epoch": 0.34494896163322775, "grad_norm": 1.6596431732177734, "learning_rate": 1.0664088100022406e-05, "loss": 0.6946, "step": 4900 }, { "epoch": 0.3450193593804998, "grad_norm": 1.683342456817627, "learning_rate": 1.0662728023785387e-05, "loss": 0.7628, "step": 4901 }, { "epoch": 0.3450897571277719, "grad_norm": 1.6519110202789307, "learning_rate": 1.0661367757119821e-05, "loss": 0.7112, "step": 4902 }, { "epoch": 0.345160154875044, "grad_norm": 2.0822367668151855, "learning_rate": 1.066000730009643e-05, "loss": 0.7058, "step": 4903 }, { "epoch": 0.3452305526223161, "grad_norm": 1.768386960029602, "learning_rate": 1.0658646652785943e-05, "loss": 0.5979, "step": 4904 }, { "epoch": 0.3453009503695882, "grad_norm": 2.0480387210845947, "learning_rate": 1.0657285815259104e-05, "loss": 0.6742, "step": 4905 }, { "epoch": 0.34537134811686027, "grad_norm": 1.8383363485336304, "learning_rate": 1.0655924787586664e-05, "loss": 0.715, "step": 4906 }, { "epoch": 0.34544174586413234, "grad_norm": 1.8986897468566895, "learning_rate": 1.0654563569839382e-05, "loss": 0.7237, "step": 4907 }, { "epoch": 0.3455121436114044, "grad_norm": 1.5691616535186768, "learning_rate": 1.0653202162088032e-05, "loss": 0.6887, "step": 4908 }, { "epoch": 0.3455825413586765, "grad_norm": 1.5873316526412964, "learning_rate": 1.0651840564403392e-05, "loss": 0.659, "step": 4909 }, { "epoch": 0.34565293910594863, "grad_norm": 1.8403667211532593, "learning_rate": 1.0650478776856253e-05, "loss": 0.6835, "step": 4910 }, { "epoch": 0.3457233368532207, "grad_norm": 2.116297483444214, "learning_rate": 1.0649116799517418e-05, "loss": 0.6925, "step": 4911 }, { "epoch": 0.3457937346004928, "grad_norm": 1.8498096466064453, "learning_rate": 1.0647754632457693e-05, "loss": 0.7973, "step": 4912 }, { "epoch": 0.34586413234776486, "grad_norm": 1.6956381797790527, "learning_rate": 1.0646392275747903e-05, "loss": 0.6967, "step": 4913 }, { "epoch": 0.34593453009503694, "grad_norm": 1.7659337520599365, "learning_rate": 1.0645029729458879e-05, "loss": 0.7188, "step": 4914 }, { "epoch": 0.3460049278423091, "grad_norm": 1.6292036771774292, "learning_rate": 1.0643666993661455e-05, "loss": 0.6381, "step": 4915 }, { "epoch": 0.34607532558958115, "grad_norm": 2.2593860626220703, "learning_rate": 1.0642304068426482e-05, "loss": 0.6273, "step": 4916 }, { "epoch": 0.34614572333685323, "grad_norm": 1.627012848854065, "learning_rate": 1.0640940953824824e-05, "loss": 0.7116, "step": 4917 }, { "epoch": 0.3462161210841253, "grad_norm": 2.450904130935669, "learning_rate": 1.0639577649927348e-05, "loss": 0.8027, "step": 4918 }, { "epoch": 0.3462865188313974, "grad_norm": 1.6138125658035278, "learning_rate": 1.0638214156804936e-05, "loss": 0.7456, "step": 4919 }, { "epoch": 0.34635691657866946, "grad_norm": 2.1558499336242676, "learning_rate": 1.0636850474528473e-05, "loss": 0.7141, "step": 4920 }, { "epoch": 0.3464273143259416, "grad_norm": 1.6468205451965332, "learning_rate": 1.063548660316886e-05, "loss": 0.7062, "step": 4921 }, { "epoch": 0.34649771207321367, "grad_norm": 1.6083965301513672, "learning_rate": 1.0634122542797006e-05, "loss": 0.6746, "step": 4922 }, { "epoch": 0.34656810982048575, "grad_norm": 1.7419465780258179, "learning_rate": 1.0632758293483832e-05, "loss": 0.7791, "step": 4923 }, { "epoch": 0.3466385075677578, "grad_norm": 1.727423906326294, "learning_rate": 1.0631393855300262e-05, "loss": 0.742, "step": 4924 }, { "epoch": 0.3467089053150299, "grad_norm": 2.2555882930755615, "learning_rate": 1.0630029228317237e-05, "loss": 0.6656, "step": 4925 }, { "epoch": 0.346779303062302, "grad_norm": 1.9159454107284546, "learning_rate": 1.0628664412605707e-05, "loss": 0.5121, "step": 4926 }, { "epoch": 0.3468497008095741, "grad_norm": 2.5646796226501465, "learning_rate": 1.0627299408236624e-05, "loss": 0.8422, "step": 4927 }, { "epoch": 0.3469200985568462, "grad_norm": 1.8121049404144287, "learning_rate": 1.062593421528096e-05, "loss": 0.7391, "step": 4928 }, { "epoch": 0.34699049630411827, "grad_norm": 1.7120318412780762, "learning_rate": 1.0624568833809695e-05, "loss": 0.6719, "step": 4929 }, { "epoch": 0.34706089405139035, "grad_norm": 1.960915446281433, "learning_rate": 1.0623203263893813e-05, "loss": 0.7072, "step": 4930 }, { "epoch": 0.3471312917986624, "grad_norm": 1.8869869709014893, "learning_rate": 1.0621837505604312e-05, "loss": 0.6857, "step": 4931 }, { "epoch": 0.34720168954593456, "grad_norm": 1.7891632318496704, "learning_rate": 1.0620471559012197e-05, "loss": 0.7739, "step": 4932 }, { "epoch": 0.34727208729320663, "grad_norm": 1.6827057600021362, "learning_rate": 1.0619105424188488e-05, "loss": 0.7303, "step": 4933 }, { "epoch": 0.3473424850404787, "grad_norm": 2.2321674823760986, "learning_rate": 1.0617739101204213e-05, "loss": 0.7537, "step": 4934 }, { "epoch": 0.3474128827877508, "grad_norm": 2.361264705657959, "learning_rate": 1.0616372590130403e-05, "loss": 0.722, "step": 4935 }, { "epoch": 0.34748328053502286, "grad_norm": 2.1654272079467773, "learning_rate": 1.0615005891038106e-05, "loss": 0.7622, "step": 4936 }, { "epoch": 0.34755367828229494, "grad_norm": 1.8957561254501343, "learning_rate": 1.0613639003998378e-05, "loss": 0.7003, "step": 4937 }, { "epoch": 0.3476240760295671, "grad_norm": 1.6441677808761597, "learning_rate": 1.0612271929082289e-05, "loss": 0.6332, "step": 4938 }, { "epoch": 0.34769447377683915, "grad_norm": 2.2181944847106934, "learning_rate": 1.061090466636091e-05, "loss": 0.7774, "step": 4939 }, { "epoch": 0.34776487152411123, "grad_norm": 1.6139205694198608, "learning_rate": 1.0609537215905326e-05, "loss": 0.6987, "step": 4940 }, { "epoch": 0.3478352692713833, "grad_norm": 2.062990427017212, "learning_rate": 1.0608169577786633e-05, "loss": 0.7359, "step": 4941 }, { "epoch": 0.3479056670186554, "grad_norm": 2.0108628273010254, "learning_rate": 1.0606801752075937e-05, "loss": 0.6667, "step": 4942 }, { "epoch": 0.3479760647659275, "grad_norm": 1.9303642511367798, "learning_rate": 1.0605433738844352e-05, "loss": 0.8116, "step": 4943 }, { "epoch": 0.3480464625131996, "grad_norm": 2.0251526832580566, "learning_rate": 1.0604065538163001e-05, "loss": 0.6202, "step": 4944 }, { "epoch": 0.34811686026047167, "grad_norm": 1.7679443359375, "learning_rate": 1.0602697150103018e-05, "loss": 0.7028, "step": 4945 }, { "epoch": 0.34818725800774375, "grad_norm": 1.6642388105392456, "learning_rate": 1.0601328574735546e-05, "loss": 0.7416, "step": 4946 }, { "epoch": 0.3482576557550158, "grad_norm": 2.2769105434417725, "learning_rate": 1.0599959812131744e-05, "loss": 0.7309, "step": 4947 }, { "epoch": 0.3483280535022879, "grad_norm": 1.6075729131698608, "learning_rate": 1.0598590862362766e-05, "loss": 0.7231, "step": 4948 }, { "epoch": 0.34839845124956004, "grad_norm": 1.7963075637817383, "learning_rate": 1.0597221725499793e-05, "loss": 0.6151, "step": 4949 }, { "epoch": 0.3484688489968321, "grad_norm": 1.928511619567871, "learning_rate": 1.0595852401614006e-05, "loss": 0.679, "step": 4950 }, { "epoch": 0.3485392467441042, "grad_norm": 1.6049004793167114, "learning_rate": 1.0594482890776594e-05, "loss": 0.6607, "step": 4951 }, { "epoch": 0.34860964449137627, "grad_norm": 2.041398525238037, "learning_rate": 1.0593113193058762e-05, "loss": 0.7593, "step": 4952 }, { "epoch": 0.34868004223864835, "grad_norm": 1.809326171875, "learning_rate": 1.059174330853172e-05, "loss": 0.6516, "step": 4953 }, { "epoch": 0.3487504399859204, "grad_norm": 1.8985366821289062, "learning_rate": 1.059037323726669e-05, "loss": 0.7381, "step": 4954 }, { "epoch": 0.34882083773319256, "grad_norm": 1.9927890300750732, "learning_rate": 1.0589002979334905e-05, "loss": 0.6697, "step": 4955 }, { "epoch": 0.34889123548046463, "grad_norm": 1.905551552772522, "learning_rate": 1.0587632534807605e-05, "loss": 0.6098, "step": 4956 }, { "epoch": 0.3489616332277367, "grad_norm": 1.8438937664031982, "learning_rate": 1.0586261903756039e-05, "loss": 0.7986, "step": 4957 }, { "epoch": 0.3490320309750088, "grad_norm": 1.3952146768569946, "learning_rate": 1.0584891086251469e-05, "loss": 0.7811, "step": 4958 }, { "epoch": 0.34910242872228087, "grad_norm": 2.032818555831909, "learning_rate": 1.0583520082365166e-05, "loss": 0.5857, "step": 4959 }, { "epoch": 0.349172826469553, "grad_norm": 1.8293943405151367, "learning_rate": 1.0582148892168407e-05, "loss": 0.8511, "step": 4960 }, { "epoch": 0.3492432242168251, "grad_norm": 1.8918215036392212, "learning_rate": 1.0580777515732484e-05, "loss": 0.6511, "step": 4961 }, { "epoch": 0.34931362196409715, "grad_norm": 2.049757480621338, "learning_rate": 1.0579405953128694e-05, "loss": 0.8176, "step": 4962 }, { "epoch": 0.34938401971136923, "grad_norm": 1.7688411474227905, "learning_rate": 1.057803420442835e-05, "loss": 0.6658, "step": 4963 }, { "epoch": 0.3494544174586413, "grad_norm": 1.9594130516052246, "learning_rate": 1.0576662269702762e-05, "loss": 0.7598, "step": 4964 }, { "epoch": 0.3495248152059134, "grad_norm": 1.6519526243209839, "learning_rate": 1.0575290149023268e-05, "loss": 0.7932, "step": 4965 }, { "epoch": 0.3495952129531855, "grad_norm": 1.974048376083374, "learning_rate": 1.0573917842461197e-05, "loss": 0.8494, "step": 4966 }, { "epoch": 0.3496656107004576, "grad_norm": 1.7019762992858887, "learning_rate": 1.0572545350087903e-05, "loss": 0.69, "step": 4967 }, { "epoch": 0.3497360084477297, "grad_norm": 1.7606393098831177, "learning_rate": 1.0571172671974742e-05, "loss": 0.6524, "step": 4968 }, { "epoch": 0.34980640619500175, "grad_norm": 2.074169635772705, "learning_rate": 1.0569799808193077e-05, "loss": 0.7006, "step": 4969 }, { "epoch": 0.34987680394227383, "grad_norm": 2.073413372039795, "learning_rate": 1.0568426758814288e-05, "loss": 0.6021, "step": 4970 }, { "epoch": 0.34994720168954596, "grad_norm": 1.7236545085906982, "learning_rate": 1.0567053523909762e-05, "loss": 0.7303, "step": 4971 }, { "epoch": 0.35001759943681804, "grad_norm": 1.7342137098312378, "learning_rate": 1.056568010355089e-05, "loss": 0.5931, "step": 4972 }, { "epoch": 0.3500879971840901, "grad_norm": 1.5078591108322144, "learning_rate": 1.056430649780908e-05, "loss": 0.7171, "step": 4973 }, { "epoch": 0.3501583949313622, "grad_norm": 1.891093134880066, "learning_rate": 1.0562932706755749e-05, "loss": 0.6718, "step": 4974 }, { "epoch": 0.35022879267863427, "grad_norm": 1.6503496170043945, "learning_rate": 1.056155873046232e-05, "loss": 0.7106, "step": 4975 }, { "epoch": 0.35029919042590635, "grad_norm": 1.7664936780929565, "learning_rate": 1.0560184569000224e-05, "loss": 0.6401, "step": 4976 }, { "epoch": 0.3503695881731785, "grad_norm": 1.81720769405365, "learning_rate": 1.0558810222440913e-05, "loss": 0.7601, "step": 4977 }, { "epoch": 0.35043998592045056, "grad_norm": 1.6572479009628296, "learning_rate": 1.055743569085583e-05, "loss": 0.7439, "step": 4978 }, { "epoch": 0.35051038366772264, "grad_norm": 1.8270928859710693, "learning_rate": 1.0556060974316449e-05, "loss": 0.6478, "step": 4979 }, { "epoch": 0.3505807814149947, "grad_norm": 1.877339243888855, "learning_rate": 1.0554686072894233e-05, "loss": 0.7492, "step": 4980 }, { "epoch": 0.3506511791622668, "grad_norm": 1.868634581565857, "learning_rate": 1.0553310986660669e-05, "loss": 0.657, "step": 4981 }, { "epoch": 0.35072157690953887, "grad_norm": 1.8079792261123657, "learning_rate": 1.0551935715687252e-05, "loss": 0.6569, "step": 4982 }, { "epoch": 0.350791974656811, "grad_norm": 1.6973915100097656, "learning_rate": 1.0550560260045477e-05, "loss": 0.7342, "step": 4983 }, { "epoch": 0.3508623724040831, "grad_norm": 1.9286606311798096, "learning_rate": 1.054918461980686e-05, "loss": 0.6673, "step": 4984 }, { "epoch": 0.35093277015135516, "grad_norm": 1.8243244886398315, "learning_rate": 1.0547808795042922e-05, "loss": 0.6629, "step": 4985 }, { "epoch": 0.35100316789862723, "grad_norm": 2.1092708110809326, "learning_rate": 1.0546432785825188e-05, "loss": 0.7852, "step": 4986 }, { "epoch": 0.3510735656458993, "grad_norm": 2.079528331756592, "learning_rate": 1.0545056592225204e-05, "loss": 0.7063, "step": 4987 }, { "epoch": 0.35114396339317144, "grad_norm": 1.8954263925552368, "learning_rate": 1.0543680214314517e-05, "loss": 0.8622, "step": 4988 }, { "epoch": 0.3512143611404435, "grad_norm": 2.7599573135375977, "learning_rate": 1.0542303652164687e-05, "loss": 0.7335, "step": 4989 }, { "epoch": 0.3512847588877156, "grad_norm": 1.9689878225326538, "learning_rate": 1.054092690584728e-05, "loss": 0.73, "step": 4990 }, { "epoch": 0.3513551566349877, "grad_norm": 1.8023734092712402, "learning_rate": 1.0539549975433876e-05, "loss": 0.6132, "step": 4991 }, { "epoch": 0.35142555438225975, "grad_norm": 2.0936477184295654, "learning_rate": 1.0538172860996065e-05, "loss": 0.6629, "step": 4992 }, { "epoch": 0.35149595212953183, "grad_norm": 1.7051453590393066, "learning_rate": 1.0536795562605443e-05, "loss": 0.6567, "step": 4993 }, { "epoch": 0.35156634987680396, "grad_norm": 1.686327576637268, "learning_rate": 1.0535418080333614e-05, "loss": 0.6859, "step": 4994 }, { "epoch": 0.35163674762407604, "grad_norm": 1.7864043712615967, "learning_rate": 1.0534040414252202e-05, "loss": 0.6901, "step": 4995 }, { "epoch": 0.3517071453713481, "grad_norm": 2.174367666244507, "learning_rate": 1.0532662564432823e-05, "loss": 0.7776, "step": 4996 }, { "epoch": 0.3517775431186202, "grad_norm": 2.069237470626831, "learning_rate": 1.053128453094712e-05, "loss": 0.7473, "step": 4997 }, { "epoch": 0.3518479408658923, "grad_norm": 1.6997774839401245, "learning_rate": 1.0529906313866738e-05, "loss": 0.6948, "step": 4998 }, { "epoch": 0.3519183386131644, "grad_norm": 1.7867473363876343, "learning_rate": 1.0528527913263327e-05, "loss": 0.8076, "step": 4999 }, { "epoch": 0.3519887363604365, "grad_norm": 1.7589291334152222, "learning_rate": 1.0527149329208554e-05, "loss": 0.7408, "step": 5000 }, { "epoch": 0.35205913410770856, "grad_norm": 1.6534780263900757, "learning_rate": 1.0525770561774095e-05, "loss": 0.6576, "step": 5001 }, { "epoch": 0.35212953185498064, "grad_norm": 1.945206880569458, "learning_rate": 1.052439161103163e-05, "loss": 0.7183, "step": 5002 }, { "epoch": 0.3521999296022527, "grad_norm": 1.6801036596298218, "learning_rate": 1.0523012477052852e-05, "loss": 0.6556, "step": 5003 }, { "epoch": 0.3522703273495248, "grad_norm": 1.5722590684890747, "learning_rate": 1.052163315990947e-05, "loss": 0.88, "step": 5004 }, { "epoch": 0.3523407250967969, "grad_norm": 2.312286376953125, "learning_rate": 1.0520253659673186e-05, "loss": 0.7122, "step": 5005 }, { "epoch": 0.352411122844069, "grad_norm": 1.9871277809143066, "learning_rate": 1.0518873976415727e-05, "loss": 0.6826, "step": 5006 }, { "epoch": 0.3524815205913411, "grad_norm": 2.3135337829589844, "learning_rate": 1.0517494110208823e-05, "loss": 0.7125, "step": 5007 }, { "epoch": 0.35255191833861316, "grad_norm": 1.9034613370895386, "learning_rate": 1.0516114061124214e-05, "loss": 0.6573, "step": 5008 }, { "epoch": 0.35262231608588523, "grad_norm": 2.024916410446167, "learning_rate": 1.051473382923365e-05, "loss": 0.6537, "step": 5009 }, { "epoch": 0.3526927138331573, "grad_norm": 2.604886770248413, "learning_rate": 1.0513353414608895e-05, "loss": 0.7455, "step": 5010 }, { "epoch": 0.35276311158042944, "grad_norm": 1.8917475938796997, "learning_rate": 1.0511972817321712e-05, "loss": 0.7108, "step": 5011 }, { "epoch": 0.3528335093277015, "grad_norm": 2.2989869117736816, "learning_rate": 1.0510592037443881e-05, "loss": 0.6491, "step": 5012 }, { "epoch": 0.3529039070749736, "grad_norm": 1.7837013006210327, "learning_rate": 1.050921107504719e-05, "loss": 0.7231, "step": 5013 }, { "epoch": 0.3529743048222457, "grad_norm": 2.028761863708496, "learning_rate": 1.0507829930203438e-05, "loss": 0.7175, "step": 5014 }, { "epoch": 0.35304470256951775, "grad_norm": 2.418998956680298, "learning_rate": 1.050644860298443e-05, "loss": 0.827, "step": 5015 }, { "epoch": 0.3531151003167899, "grad_norm": 2.2289206981658936, "learning_rate": 1.0505067093461987e-05, "loss": 0.6976, "step": 5016 }, { "epoch": 0.35318549806406196, "grad_norm": 2.466489791870117, "learning_rate": 1.0503685401707926e-05, "loss": 0.6776, "step": 5017 }, { "epoch": 0.35325589581133404, "grad_norm": 1.9794361591339111, "learning_rate": 1.0502303527794093e-05, "loss": 0.6602, "step": 5018 }, { "epoch": 0.3533262935586061, "grad_norm": 2.1910600662231445, "learning_rate": 1.0500921471792326e-05, "loss": 0.7164, "step": 5019 }, { "epoch": 0.3533966913058782, "grad_norm": 1.7454729080200195, "learning_rate": 1.0499539233774478e-05, "loss": 0.6772, "step": 5020 }, { "epoch": 0.3534670890531503, "grad_norm": 2.1876533031463623, "learning_rate": 1.0498156813812419e-05, "loss": 0.686, "step": 5021 }, { "epoch": 0.3535374868004224, "grad_norm": 1.730958104133606, "learning_rate": 1.049677421197802e-05, "loss": 0.7212, "step": 5022 }, { "epoch": 0.3536078845476945, "grad_norm": 2.426898241043091, "learning_rate": 1.049539142834316e-05, "loss": 0.8384, "step": 5023 }, { "epoch": 0.35367828229496656, "grad_norm": 1.7746762037277222, "learning_rate": 1.0494008462979735e-05, "loss": 0.7837, "step": 5024 }, { "epoch": 0.35374868004223864, "grad_norm": 2.1444036960601807, "learning_rate": 1.0492625315959647e-05, "loss": 0.7332, "step": 5025 }, { "epoch": 0.3538190777895107, "grad_norm": 1.8774069547653198, "learning_rate": 1.0491241987354805e-05, "loss": 0.7557, "step": 5026 }, { "epoch": 0.35388947553678285, "grad_norm": 1.6719549894332886, "learning_rate": 1.0489858477237129e-05, "loss": 0.817, "step": 5027 }, { "epoch": 0.3539598732840549, "grad_norm": 2.0939130783081055, "learning_rate": 1.0488474785678553e-05, "loss": 0.6269, "step": 5028 }, { "epoch": 0.354030271031327, "grad_norm": 1.8406318426132202, "learning_rate": 1.0487090912751009e-05, "loss": 0.723, "step": 5029 }, { "epoch": 0.3541006687785991, "grad_norm": 2.0563204288482666, "learning_rate": 1.0485706858526454e-05, "loss": 0.7694, "step": 5030 }, { "epoch": 0.35417106652587116, "grad_norm": 1.8692225217819214, "learning_rate": 1.048432262307684e-05, "loss": 0.6323, "step": 5031 }, { "epoch": 0.35424146427314324, "grad_norm": 1.688270926475525, "learning_rate": 1.0482938206474138e-05, "loss": 0.6764, "step": 5032 }, { "epoch": 0.35431186202041537, "grad_norm": 1.6144022941589355, "learning_rate": 1.0481553608790327e-05, "loss": 0.7987, "step": 5033 }, { "epoch": 0.35438225976768745, "grad_norm": 2.513026475906372, "learning_rate": 1.0480168830097388e-05, "loss": 0.684, "step": 5034 }, { "epoch": 0.3544526575149595, "grad_norm": 2.033498764038086, "learning_rate": 1.0478783870467322e-05, "loss": 0.705, "step": 5035 }, { "epoch": 0.3545230552622316, "grad_norm": 2.6701114177703857, "learning_rate": 1.047739872997213e-05, "loss": 0.6523, "step": 5036 }, { "epoch": 0.3545934530095037, "grad_norm": 2.1940674781799316, "learning_rate": 1.047601340868383e-05, "loss": 0.7225, "step": 5037 }, { "epoch": 0.35466385075677576, "grad_norm": 1.7631759643554688, "learning_rate": 1.0474627906674443e-05, "loss": 0.6927, "step": 5038 }, { "epoch": 0.3547342485040479, "grad_norm": 1.8962273597717285, "learning_rate": 1.0473242224016006e-05, "loss": 0.5739, "step": 5039 }, { "epoch": 0.35480464625131997, "grad_norm": 1.8077579736709595, "learning_rate": 1.047185636078056e-05, "loss": 0.7024, "step": 5040 }, { "epoch": 0.35487504399859204, "grad_norm": 2.057858943939209, "learning_rate": 1.0470470317040157e-05, "loss": 0.7096, "step": 5041 }, { "epoch": 0.3549454417458641, "grad_norm": 2.021698236465454, "learning_rate": 1.0469084092866858e-05, "loss": 0.6867, "step": 5042 }, { "epoch": 0.3550158394931362, "grad_norm": 1.8610731363296509, "learning_rate": 1.0467697688332737e-05, "loss": 0.6808, "step": 5043 }, { "epoch": 0.35508623724040833, "grad_norm": 2.360499858856201, "learning_rate": 1.0466311103509873e-05, "loss": 0.8005, "step": 5044 }, { "epoch": 0.3551566349876804, "grad_norm": 1.832512617111206, "learning_rate": 1.0464924338470354e-05, "loss": 0.6549, "step": 5045 }, { "epoch": 0.3552270327349525, "grad_norm": 1.9466334581375122, "learning_rate": 1.0463537393286283e-05, "loss": 0.6955, "step": 5046 }, { "epoch": 0.35529743048222456, "grad_norm": 1.568792700767517, "learning_rate": 1.0462150268029764e-05, "loss": 0.6531, "step": 5047 }, { "epoch": 0.35536782822949664, "grad_norm": 1.9727941751480103, "learning_rate": 1.0460762962772916e-05, "loss": 0.7236, "step": 5048 }, { "epoch": 0.3554382259767687, "grad_norm": 1.7591361999511719, "learning_rate": 1.045937547758787e-05, "loss": 0.8445, "step": 5049 }, { "epoch": 0.35550862372404085, "grad_norm": 2.0757646560668945, "learning_rate": 1.045798781254676e-05, "loss": 0.677, "step": 5050 }, { "epoch": 0.35557902147131293, "grad_norm": 1.5907691717147827, "learning_rate": 1.045659996772173e-05, "loss": 0.6407, "step": 5051 }, { "epoch": 0.355649419218585, "grad_norm": 1.9472310543060303, "learning_rate": 1.0455211943184937e-05, "loss": 0.6693, "step": 5052 }, { "epoch": 0.3557198169658571, "grad_norm": 1.6415135860443115, "learning_rate": 1.0453823739008548e-05, "loss": 0.7297, "step": 5053 }, { "epoch": 0.35579021471312916, "grad_norm": 1.8781920671463013, "learning_rate": 1.0452435355264733e-05, "loss": 0.6319, "step": 5054 }, { "epoch": 0.3558606124604013, "grad_norm": 1.8058972358703613, "learning_rate": 1.0451046792025679e-05, "loss": 0.7409, "step": 5055 }, { "epoch": 0.35593101020767337, "grad_norm": 1.9828354120254517, "learning_rate": 1.0449658049363575e-05, "loss": 0.68, "step": 5056 }, { "epoch": 0.35600140795494545, "grad_norm": 7.528056621551514, "learning_rate": 1.0448269127350624e-05, "loss": 0.7368, "step": 5057 }, { "epoch": 0.3560718057022175, "grad_norm": 1.6437970399856567, "learning_rate": 1.044688002605904e-05, "loss": 0.6721, "step": 5058 }, { "epoch": 0.3561422034494896, "grad_norm": 2.325629472732544, "learning_rate": 1.044549074556104e-05, "loss": 0.7894, "step": 5059 }, { "epoch": 0.3562126011967617, "grad_norm": 2.0565502643585205, "learning_rate": 1.0444101285928858e-05, "loss": 0.7555, "step": 5060 }, { "epoch": 0.3562829989440338, "grad_norm": 1.746848225593567, "learning_rate": 1.0442711647234732e-05, "loss": 0.5482, "step": 5061 }, { "epoch": 0.3563533966913059, "grad_norm": 2.01792049407959, "learning_rate": 1.0441321829550906e-05, "loss": 0.7306, "step": 5062 }, { "epoch": 0.35642379443857797, "grad_norm": 1.982556700706482, "learning_rate": 1.0439931832949642e-05, "loss": 0.7922, "step": 5063 }, { "epoch": 0.35649419218585005, "grad_norm": 2.311896800994873, "learning_rate": 1.0438541657503209e-05, "loss": 0.6471, "step": 5064 }, { "epoch": 0.3565645899331221, "grad_norm": 2.4444401264190674, "learning_rate": 1.0437151303283879e-05, "loss": 0.7341, "step": 5065 }, { "epoch": 0.3566349876803942, "grad_norm": 2.190674066543579, "learning_rate": 1.0435760770363941e-05, "loss": 0.7284, "step": 5066 }, { "epoch": 0.35670538542766633, "grad_norm": 1.95771062374115, "learning_rate": 1.043437005881569e-05, "loss": 0.7187, "step": 5067 }, { "epoch": 0.3567757831749384, "grad_norm": 1.950106143951416, "learning_rate": 1.0432979168711425e-05, "loss": 0.6703, "step": 5068 }, { "epoch": 0.3568461809222105, "grad_norm": 1.7830218076705933, "learning_rate": 1.0431588100123468e-05, "loss": 0.7029, "step": 5069 }, { "epoch": 0.35691657866948256, "grad_norm": 2.2750158309936523, "learning_rate": 1.0430196853124137e-05, "loss": 0.6864, "step": 5070 }, { "epoch": 0.35698697641675464, "grad_norm": 2.7766807079315186, "learning_rate": 1.0428805427785765e-05, "loss": 0.6459, "step": 5071 }, { "epoch": 0.3570573741640268, "grad_norm": 2.0123088359832764, "learning_rate": 1.0427413824180691e-05, "loss": 0.6941, "step": 5072 }, { "epoch": 0.35712777191129885, "grad_norm": 1.961926817893982, "learning_rate": 1.0426022042381271e-05, "loss": 0.7389, "step": 5073 }, { "epoch": 0.35719816965857093, "grad_norm": 1.8447461128234863, "learning_rate": 1.0424630082459862e-05, "loss": 0.6869, "step": 5074 }, { "epoch": 0.357268567405843, "grad_norm": 1.6866005659103394, "learning_rate": 1.0423237944488833e-05, "loss": 0.7269, "step": 5075 }, { "epoch": 0.3573389651531151, "grad_norm": 2.3867897987365723, "learning_rate": 1.0421845628540565e-05, "loss": 0.6802, "step": 5076 }, { "epoch": 0.35740936290038716, "grad_norm": 5.004610061645508, "learning_rate": 1.042045313468744e-05, "loss": 0.6485, "step": 5077 }, { "epoch": 0.3574797606476593, "grad_norm": 1.6532641649246216, "learning_rate": 1.041906046300186e-05, "loss": 0.7116, "step": 5078 }, { "epoch": 0.35755015839493137, "grad_norm": 1.7845133543014526, "learning_rate": 1.0417667613556232e-05, "loss": 0.7265, "step": 5079 }, { "epoch": 0.35762055614220345, "grad_norm": 1.8873200416564941, "learning_rate": 1.0416274586422966e-05, "loss": 0.6477, "step": 5080 }, { "epoch": 0.3576909538894755, "grad_norm": 1.813827633857727, "learning_rate": 1.0414881381674493e-05, "loss": 0.8561, "step": 5081 }, { "epoch": 0.3577613516367476, "grad_norm": 1.6803306341171265, "learning_rate": 1.0413487999383243e-05, "loss": 0.6258, "step": 5082 }, { "epoch": 0.35783174938401974, "grad_norm": 2.1102070808410645, "learning_rate": 1.0412094439621662e-05, "loss": 0.5803, "step": 5083 }, { "epoch": 0.3579021471312918, "grad_norm": 1.9686511754989624, "learning_rate": 1.04107007024622e-05, "loss": 0.8176, "step": 5084 }, { "epoch": 0.3579725448785639, "grad_norm": 1.8303474187850952, "learning_rate": 1.040930678797732e-05, "loss": 0.8294, "step": 5085 }, { "epoch": 0.35804294262583597, "grad_norm": 1.9779349565505981, "learning_rate": 1.0407912696239492e-05, "loss": 0.7271, "step": 5086 }, { "epoch": 0.35811334037310805, "grad_norm": 1.5680946111679077, "learning_rate": 1.0406518427321199e-05, "loss": 0.9393, "step": 5087 }, { "epoch": 0.3581837381203801, "grad_norm": 2.2610929012298584, "learning_rate": 1.0405123981294923e-05, "loss": 0.7643, "step": 5088 }, { "epoch": 0.35825413586765226, "grad_norm": 1.921730637550354, "learning_rate": 1.040372935823317e-05, "loss": 0.7507, "step": 5089 }, { "epoch": 0.35832453361492433, "grad_norm": 2.439023017883301, "learning_rate": 1.0402334558208446e-05, "loss": 0.6568, "step": 5090 }, { "epoch": 0.3583949313621964, "grad_norm": 1.7815322875976562, "learning_rate": 1.0400939581293264e-05, "loss": 0.6568, "step": 5091 }, { "epoch": 0.3584653291094685, "grad_norm": 1.8015146255493164, "learning_rate": 1.0399544427560156e-05, "loss": 0.7945, "step": 5092 }, { "epoch": 0.35853572685674057, "grad_norm": 2.2215230464935303, "learning_rate": 1.0398149097081655e-05, "loss": 0.6578, "step": 5093 }, { "epoch": 0.3586061246040127, "grad_norm": 1.681355357170105, "learning_rate": 1.0396753589930302e-05, "loss": 0.6993, "step": 5094 }, { "epoch": 0.3586765223512848, "grad_norm": 2.332341432571411, "learning_rate": 1.0395357906178655e-05, "loss": 0.5606, "step": 5095 }, { "epoch": 0.35874692009855685, "grad_norm": 1.8759791851043701, "learning_rate": 1.0393962045899274e-05, "loss": 0.7579, "step": 5096 }, { "epoch": 0.35881731784582893, "grad_norm": 1.8902686834335327, "learning_rate": 1.0392566009164735e-05, "loss": 0.8334, "step": 5097 }, { "epoch": 0.358887715593101, "grad_norm": 2.6006693840026855, "learning_rate": 1.0391169796047617e-05, "loss": 0.7323, "step": 5098 }, { "epoch": 0.3589581133403731, "grad_norm": 1.9719083309173584, "learning_rate": 1.0389773406620507e-05, "loss": 0.7177, "step": 5099 }, { "epoch": 0.3590285110876452, "grad_norm": 2.0869803428649902, "learning_rate": 1.0388376840956009e-05, "loss": 0.7241, "step": 5100 }, { "epoch": 0.3590989088349173, "grad_norm": 1.7082873582839966, "learning_rate": 1.038698009912673e-05, "loss": 0.664, "step": 5101 }, { "epoch": 0.3591693065821894, "grad_norm": 1.764224886894226, "learning_rate": 1.038558318120529e-05, "loss": 0.7395, "step": 5102 }, { "epoch": 0.35923970432946145, "grad_norm": 1.9368475675582886, "learning_rate": 1.0384186087264314e-05, "loss": 0.7883, "step": 5103 }, { "epoch": 0.35931010207673353, "grad_norm": 1.6472523212432861, "learning_rate": 1.0382788817376435e-05, "loss": 0.6776, "step": 5104 }, { "epoch": 0.3593804998240056, "grad_norm": 1.8840534687042236, "learning_rate": 1.0381391371614305e-05, "loss": 0.7309, "step": 5105 }, { "epoch": 0.35945089757127774, "grad_norm": 2.207127809524536, "learning_rate": 1.0379993750050575e-05, "loss": 0.8487, "step": 5106 }, { "epoch": 0.3595212953185498, "grad_norm": 2.1657581329345703, "learning_rate": 1.0378595952757908e-05, "loss": 0.779, "step": 5107 }, { "epoch": 0.3595916930658219, "grad_norm": 1.737626075744629, "learning_rate": 1.0377197979808977e-05, "loss": 0.7341, "step": 5108 }, { "epoch": 0.35966209081309397, "grad_norm": 1.7929283380508423, "learning_rate": 1.0375799831276466e-05, "loss": 0.7297, "step": 5109 }, { "epoch": 0.35973248856036605, "grad_norm": 1.8559101819992065, "learning_rate": 1.0374401507233064e-05, "loss": 0.6952, "step": 5110 }, { "epoch": 0.3598028863076382, "grad_norm": 2.2286159992218018, "learning_rate": 1.0373003007751472e-05, "loss": 0.7954, "step": 5111 }, { "epoch": 0.35987328405491026, "grad_norm": 2.4177775382995605, "learning_rate": 1.0371604332904397e-05, "loss": 0.5478, "step": 5112 }, { "epoch": 0.35994368180218234, "grad_norm": 1.763763666152954, "learning_rate": 1.037020548276456e-05, "loss": 0.6865, "step": 5113 }, { "epoch": 0.3600140795494544, "grad_norm": 1.6133149862289429, "learning_rate": 1.036880645740469e-05, "loss": 0.7598, "step": 5114 }, { "epoch": 0.3600844772967265, "grad_norm": 1.6351861953735352, "learning_rate": 1.036740725689752e-05, "loss": 0.6882, "step": 5115 }, { "epoch": 0.36015487504399857, "grad_norm": 3.065706491470337, "learning_rate": 1.0366007881315794e-05, "loss": 0.6998, "step": 5116 }, { "epoch": 0.3602252727912707, "grad_norm": 1.978210687637329, "learning_rate": 1.0364608330732274e-05, "loss": 0.7188, "step": 5117 }, { "epoch": 0.3602956705385428, "grad_norm": 2.0100600719451904, "learning_rate": 1.0363208605219718e-05, "loss": 0.6597, "step": 5118 }, { "epoch": 0.36036606828581486, "grad_norm": 1.7436565160751343, "learning_rate": 1.0361808704850901e-05, "loss": 0.7224, "step": 5119 }, { "epoch": 0.36043646603308693, "grad_norm": 1.8930715322494507, "learning_rate": 1.0360408629698603e-05, "loss": 0.6482, "step": 5120 }, { "epoch": 0.360506863780359, "grad_norm": 1.902958631515503, "learning_rate": 1.0359008379835619e-05, "loss": 0.776, "step": 5121 }, { "epoch": 0.36057726152763114, "grad_norm": 1.7349053621292114, "learning_rate": 1.0357607955334747e-05, "loss": 0.6963, "step": 5122 }, { "epoch": 0.3606476592749032, "grad_norm": 1.5128800868988037, "learning_rate": 1.0356207356268797e-05, "loss": 0.7143, "step": 5123 }, { "epoch": 0.3607180570221753, "grad_norm": 2.214149236679077, "learning_rate": 1.0354806582710586e-05, "loss": 0.6356, "step": 5124 }, { "epoch": 0.3607884547694474, "grad_norm": 2.167600631713867, "learning_rate": 1.0353405634732943e-05, "loss": 0.7569, "step": 5125 }, { "epoch": 0.36085885251671945, "grad_norm": 1.548780918121338, "learning_rate": 1.0352004512408703e-05, "loss": 0.6386, "step": 5126 }, { "epoch": 0.36092925026399153, "grad_norm": 1.8998831510543823, "learning_rate": 1.0350603215810714e-05, "loss": 0.6912, "step": 5127 }, { "epoch": 0.36099964801126366, "grad_norm": 2.008417844772339, "learning_rate": 1.0349201745011829e-05, "loss": 0.667, "step": 5128 }, { "epoch": 0.36107004575853574, "grad_norm": 1.9682005643844604, "learning_rate": 1.034780010008491e-05, "loss": 0.7774, "step": 5129 }, { "epoch": 0.3611404435058078, "grad_norm": 1.5230432748794556, "learning_rate": 1.0346398281102835e-05, "loss": 0.5395, "step": 5130 }, { "epoch": 0.3612108412530799, "grad_norm": 1.870314359664917, "learning_rate": 1.034499628813848e-05, "loss": 0.6736, "step": 5131 }, { "epoch": 0.361281239000352, "grad_norm": 2.881248712539673, "learning_rate": 1.0343594121264742e-05, "loss": 0.7385, "step": 5132 }, { "epoch": 0.36135163674762405, "grad_norm": 1.882060170173645, "learning_rate": 1.0342191780554513e-05, "loss": 0.7054, "step": 5133 }, { "epoch": 0.3614220344948962, "grad_norm": 3.220262289047241, "learning_rate": 1.0340789266080709e-05, "loss": 0.7046, "step": 5134 }, { "epoch": 0.36149243224216826, "grad_norm": 1.9311374425888062, "learning_rate": 1.0339386577916246e-05, "loss": 0.6562, "step": 5135 }, { "epoch": 0.36156282998944034, "grad_norm": 1.919357180595398, "learning_rate": 1.033798371613405e-05, "loss": 0.7437, "step": 5136 }, { "epoch": 0.3616332277367124, "grad_norm": 2.036996364593506, "learning_rate": 1.0336580680807057e-05, "loss": 0.7226, "step": 5137 }, { "epoch": 0.3617036254839845, "grad_norm": 1.9273827075958252, "learning_rate": 1.0335177472008213e-05, "loss": 0.7425, "step": 5138 }, { "epoch": 0.3617740232312566, "grad_norm": 2.3654074668884277, "learning_rate": 1.0333774089810473e-05, "loss": 0.7656, "step": 5139 }, { "epoch": 0.3618444209785287, "grad_norm": 1.8453316688537598, "learning_rate": 1.0332370534286795e-05, "loss": 0.6632, "step": 5140 }, { "epoch": 0.3619148187258008, "grad_norm": 1.9159170389175415, "learning_rate": 1.0330966805510159e-05, "loss": 0.769, "step": 5141 }, { "epoch": 0.36198521647307286, "grad_norm": 1.5500129461288452, "learning_rate": 1.032956290355354e-05, "loss": 0.6213, "step": 5142 }, { "epoch": 0.36205561422034493, "grad_norm": 1.5804864168167114, "learning_rate": 1.032815882848993e-05, "loss": 0.7313, "step": 5143 }, { "epoch": 0.362126011967617, "grad_norm": 1.9040833711624146, "learning_rate": 1.032675458039233e-05, "loss": 0.8493, "step": 5144 }, { "epoch": 0.36219640971488914, "grad_norm": 1.911996841430664, "learning_rate": 1.0325350159333748e-05, "loss": 0.8525, "step": 5145 }, { "epoch": 0.3622668074621612, "grad_norm": 1.8081555366516113, "learning_rate": 1.0323945565387194e-05, "loss": 0.7221, "step": 5146 }, { "epoch": 0.3623372052094333, "grad_norm": 2.235577344894409, "learning_rate": 1.0322540798625703e-05, "loss": 0.6886, "step": 5147 }, { "epoch": 0.3624076029567054, "grad_norm": 1.6317269802093506, "learning_rate": 1.0321135859122309e-05, "loss": 0.7168, "step": 5148 }, { "epoch": 0.36247800070397745, "grad_norm": 1.6889580488204956, "learning_rate": 1.031973074695005e-05, "loss": 0.7255, "step": 5149 }, { "epoch": 0.3625483984512496, "grad_norm": 1.999009609222412, "learning_rate": 1.0318325462181984e-05, "loss": 0.7288, "step": 5150 }, { "epoch": 0.36261879619852166, "grad_norm": 1.9690163135528564, "learning_rate": 1.0316920004891172e-05, "loss": 0.8263, "step": 5151 }, { "epoch": 0.36268919394579374, "grad_norm": 1.8956488370895386, "learning_rate": 1.0315514375150685e-05, "loss": 0.7811, "step": 5152 }, { "epoch": 0.3627595916930658, "grad_norm": 2.3836379051208496, "learning_rate": 1.0314108573033605e-05, "loss": 0.7319, "step": 5153 }, { "epoch": 0.3628299894403379, "grad_norm": 1.6391150951385498, "learning_rate": 1.0312702598613018e-05, "loss": 0.5589, "step": 5154 }, { "epoch": 0.36290038718761, "grad_norm": 1.8116405010223389, "learning_rate": 1.0311296451962022e-05, "loss": 0.7496, "step": 5155 }, { "epoch": 0.3629707849348821, "grad_norm": 1.7769863605499268, "learning_rate": 1.0309890133153724e-05, "loss": 0.7536, "step": 5156 }, { "epoch": 0.3630411826821542, "grad_norm": 2.109684705734253, "learning_rate": 1.0308483642261241e-05, "loss": 0.7226, "step": 5157 }, { "epoch": 0.36311158042942626, "grad_norm": 1.6316914558410645, "learning_rate": 1.0307076979357696e-05, "loss": 0.6734, "step": 5158 }, { "epoch": 0.36318197817669834, "grad_norm": 1.9353424310684204, "learning_rate": 1.0305670144516225e-05, "loss": 0.7028, "step": 5159 }, { "epoch": 0.3632523759239704, "grad_norm": 1.9748148918151855, "learning_rate": 1.030426313780997e-05, "loss": 0.6927, "step": 5160 }, { "epoch": 0.3633227736712425, "grad_norm": 1.975672960281372, "learning_rate": 1.030285595931208e-05, "loss": 0.7357, "step": 5161 }, { "epoch": 0.3633931714185146, "grad_norm": 2.2631421089172363, "learning_rate": 1.030144860909572e-05, "loss": 0.5991, "step": 5162 }, { "epoch": 0.3634635691657867, "grad_norm": 2.3809468746185303, "learning_rate": 1.0300041087234057e-05, "loss": 0.6569, "step": 5163 }, { "epoch": 0.3635339669130588, "grad_norm": 1.8027180433273315, "learning_rate": 1.0298633393800266e-05, "loss": 0.72, "step": 5164 }, { "epoch": 0.36360436466033086, "grad_norm": 1.7178940773010254, "learning_rate": 1.0297225528867538e-05, "loss": 0.754, "step": 5165 }, { "epoch": 0.36367476240760294, "grad_norm": 2.095451593399048, "learning_rate": 1.029581749250907e-05, "loss": 0.8501, "step": 5166 }, { "epoch": 0.36374516015487507, "grad_norm": 1.754336953163147, "learning_rate": 1.0294409284798066e-05, "loss": 0.6801, "step": 5167 }, { "epoch": 0.36381555790214715, "grad_norm": 2.1442601680755615, "learning_rate": 1.029300090580774e-05, "loss": 0.6978, "step": 5168 }, { "epoch": 0.3638859556494192, "grad_norm": 1.7362371683120728, "learning_rate": 1.0291592355611314e-05, "loss": 0.6443, "step": 5169 }, { "epoch": 0.3639563533966913, "grad_norm": 1.7638697624206543, "learning_rate": 1.0290183634282019e-05, "loss": 0.6343, "step": 5170 }, { "epoch": 0.3640267511439634, "grad_norm": 2.297386884689331, "learning_rate": 1.0288774741893097e-05, "loss": 0.7595, "step": 5171 }, { "epoch": 0.36409714889123546, "grad_norm": 2.083707094192505, "learning_rate": 1.0287365678517799e-05, "loss": 0.6549, "step": 5172 }, { "epoch": 0.3641675466385076, "grad_norm": 1.8162933588027954, "learning_rate": 1.0285956444229383e-05, "loss": 0.7719, "step": 5173 }, { "epoch": 0.36423794438577967, "grad_norm": 4.074939250946045, "learning_rate": 1.0284547039101114e-05, "loss": 0.6464, "step": 5174 }, { "epoch": 0.36430834213305174, "grad_norm": 2.0279970169067383, "learning_rate": 1.0283137463206272e-05, "loss": 0.7168, "step": 5175 }, { "epoch": 0.3643787398803238, "grad_norm": 2.039433240890503, "learning_rate": 1.0281727716618136e-05, "loss": 0.7254, "step": 5176 }, { "epoch": 0.3644491376275959, "grad_norm": 1.6674975156784058, "learning_rate": 1.0280317799410006e-05, "loss": 0.648, "step": 5177 }, { "epoch": 0.36451953537486803, "grad_norm": 1.9281716346740723, "learning_rate": 1.0278907711655183e-05, "loss": 0.7628, "step": 5178 }, { "epoch": 0.3645899331221401, "grad_norm": 1.9448643922805786, "learning_rate": 1.0277497453426977e-05, "loss": 0.786, "step": 5179 }, { "epoch": 0.3646603308694122, "grad_norm": 1.7835553884506226, "learning_rate": 1.0276087024798713e-05, "loss": 0.7185, "step": 5180 }, { "epoch": 0.36473072861668426, "grad_norm": 1.6675379276275635, "learning_rate": 1.0274676425843716e-05, "loss": 0.6796, "step": 5181 }, { "epoch": 0.36480112636395634, "grad_norm": 2.119790554046631, "learning_rate": 1.0273265656635326e-05, "loss": 0.7562, "step": 5182 }, { "epoch": 0.3648715241112284, "grad_norm": 1.7027209997177124, "learning_rate": 1.027185471724689e-05, "loss": 0.6364, "step": 5183 }, { "epoch": 0.36494192185850055, "grad_norm": 1.6613198518753052, "learning_rate": 1.0270443607751764e-05, "loss": 0.6113, "step": 5184 }, { "epoch": 0.36501231960577263, "grad_norm": 1.8618322610855103, "learning_rate": 1.0269032328223312e-05, "loss": 0.6861, "step": 5185 }, { "epoch": 0.3650827173530447, "grad_norm": 1.7093168497085571, "learning_rate": 1.0267620878734912e-05, "loss": 0.832, "step": 5186 }, { "epoch": 0.3651531151003168, "grad_norm": 1.9173895120620728, "learning_rate": 1.026620925935994e-05, "loss": 0.7089, "step": 5187 }, { "epoch": 0.36522351284758886, "grad_norm": 1.7718486785888672, "learning_rate": 1.0264797470171791e-05, "loss": 0.7599, "step": 5188 }, { "epoch": 0.36529391059486094, "grad_norm": 1.7508693933486938, "learning_rate": 1.0263385511243865e-05, "loss": 0.6441, "step": 5189 }, { "epoch": 0.36536430834213307, "grad_norm": 2.0049166679382324, "learning_rate": 1.026197338264957e-05, "loss": 0.6969, "step": 5190 }, { "epoch": 0.36543470608940515, "grad_norm": 1.8473037481307983, "learning_rate": 1.0260561084462327e-05, "loss": 0.7871, "step": 5191 }, { "epoch": 0.3655051038366772, "grad_norm": 2.0289714336395264, "learning_rate": 1.025914861675556e-05, "loss": 0.7698, "step": 5192 }, { "epoch": 0.3655755015839493, "grad_norm": 1.7718275785446167, "learning_rate": 1.0257735979602703e-05, "loss": 0.7483, "step": 5193 }, { "epoch": 0.3656458993312214, "grad_norm": 1.8395198583602905, "learning_rate": 1.0256323173077202e-05, "loss": 0.7448, "step": 5194 }, { "epoch": 0.3657162970784935, "grad_norm": 1.6992026567459106, "learning_rate": 1.0254910197252511e-05, "loss": 0.598, "step": 5195 }, { "epoch": 0.3657866948257656, "grad_norm": 1.8085403442382812, "learning_rate": 1.0253497052202091e-05, "loss": 0.6794, "step": 5196 }, { "epoch": 0.36585709257303767, "grad_norm": 1.6625057458877563, "learning_rate": 1.025208373799941e-05, "loss": 0.6646, "step": 5197 }, { "epoch": 0.36592749032030975, "grad_norm": 1.7434053421020508, "learning_rate": 1.0250670254717955e-05, "loss": 0.6834, "step": 5198 }, { "epoch": 0.3659978880675818, "grad_norm": 1.778206467628479, "learning_rate": 1.0249256602431208e-05, "loss": 0.6847, "step": 5199 }, { "epoch": 0.3660682858148539, "grad_norm": 1.882431983947754, "learning_rate": 1.0247842781212665e-05, "loss": 0.6779, "step": 5200 }, { "epoch": 0.36613868356212603, "grad_norm": 1.9357483386993408, "learning_rate": 1.0246428791135838e-05, "loss": 0.6316, "step": 5201 }, { "epoch": 0.3662090813093981, "grad_norm": 1.6566604375839233, "learning_rate": 1.0245014632274236e-05, "loss": 0.6878, "step": 5202 }, { "epoch": 0.3662794790566702, "grad_norm": 1.8348414897918701, "learning_rate": 1.0243600304701383e-05, "loss": 0.7995, "step": 5203 }, { "epoch": 0.36634987680394226, "grad_norm": 1.9631662368774414, "learning_rate": 1.0242185808490814e-05, "loss": 0.6366, "step": 5204 }, { "epoch": 0.36642027455121434, "grad_norm": 1.589158296585083, "learning_rate": 1.0240771143716071e-05, "loss": 0.7802, "step": 5205 }, { "epoch": 0.3664906722984865, "grad_norm": 1.8015682697296143, "learning_rate": 1.0239356310450698e-05, "loss": 0.7792, "step": 5206 }, { "epoch": 0.36656107004575855, "grad_norm": 1.8317927122116089, "learning_rate": 1.0237941308768256e-05, "loss": 0.7143, "step": 5207 }, { "epoch": 0.36663146779303063, "grad_norm": 1.5566601753234863, "learning_rate": 1.0236526138742315e-05, "loss": 0.65, "step": 5208 }, { "epoch": 0.3667018655403027, "grad_norm": 1.8618600368499756, "learning_rate": 1.0235110800446449e-05, "loss": 0.6194, "step": 5209 }, { "epoch": 0.3667722632875748, "grad_norm": 1.72518789768219, "learning_rate": 1.0233695293954242e-05, "loss": 0.7625, "step": 5210 }, { "epoch": 0.36684266103484686, "grad_norm": 1.7208424806594849, "learning_rate": 1.0232279619339288e-05, "loss": 0.7323, "step": 5211 }, { "epoch": 0.366913058782119, "grad_norm": 1.710679531097412, "learning_rate": 1.0230863776675188e-05, "loss": 0.7231, "step": 5212 }, { "epoch": 0.36698345652939107, "grad_norm": 1.8550952672958374, "learning_rate": 1.0229447766035556e-05, "loss": 0.7839, "step": 5213 }, { "epoch": 0.36705385427666315, "grad_norm": 1.832140564918518, "learning_rate": 1.022803158749401e-05, "loss": 0.7719, "step": 5214 }, { "epoch": 0.3671242520239352, "grad_norm": 1.8216450214385986, "learning_rate": 1.0226615241124177e-05, "loss": 0.5738, "step": 5215 }, { "epoch": 0.3671946497712073, "grad_norm": 1.9276846647262573, "learning_rate": 1.0225198726999696e-05, "loss": 0.8599, "step": 5216 }, { "epoch": 0.3672650475184794, "grad_norm": 2.6223535537719727, "learning_rate": 1.0223782045194213e-05, "loss": 0.8738, "step": 5217 }, { "epoch": 0.3673354452657515, "grad_norm": 2.207710027694702, "learning_rate": 1.0222365195781381e-05, "loss": 0.6108, "step": 5218 }, { "epoch": 0.3674058430130236, "grad_norm": 1.9490889310836792, "learning_rate": 1.0220948178834864e-05, "loss": 0.6931, "step": 5219 }, { "epoch": 0.36747624076029567, "grad_norm": 1.6793593168258667, "learning_rate": 1.0219530994428335e-05, "loss": 0.7077, "step": 5220 }, { "epoch": 0.36754663850756775, "grad_norm": 1.7949142456054688, "learning_rate": 1.0218113642635473e-05, "loss": 0.7351, "step": 5221 }, { "epoch": 0.3676170362548398, "grad_norm": 1.76246178150177, "learning_rate": 1.0216696123529968e-05, "loss": 0.6655, "step": 5222 }, { "epoch": 0.36768743400211196, "grad_norm": 2.0549397468566895, "learning_rate": 1.0215278437185519e-05, "loss": 0.7076, "step": 5223 }, { "epoch": 0.36775783174938403, "grad_norm": 1.5700234174728394, "learning_rate": 1.0213860583675833e-05, "loss": 0.654, "step": 5224 }, { "epoch": 0.3678282294966561, "grad_norm": 1.653539776802063, "learning_rate": 1.0212442563074622e-05, "loss": 0.6349, "step": 5225 }, { "epoch": 0.3678986272439282, "grad_norm": 1.7631062269210815, "learning_rate": 1.0211024375455616e-05, "loss": 0.7, "step": 5226 }, { "epoch": 0.36796902499120027, "grad_norm": 1.9478917121887207, "learning_rate": 1.0209606020892541e-05, "loss": 0.7321, "step": 5227 }, { "epoch": 0.36803942273847234, "grad_norm": 1.4790852069854736, "learning_rate": 1.0208187499459145e-05, "loss": 0.6842, "step": 5228 }, { "epoch": 0.3681098204857445, "grad_norm": 1.5369781255722046, "learning_rate": 1.0206768811229176e-05, "loss": 0.6484, "step": 5229 }, { "epoch": 0.36818021823301655, "grad_norm": 2.3798553943634033, "learning_rate": 1.0205349956276388e-05, "loss": 0.7477, "step": 5230 }, { "epoch": 0.36825061598028863, "grad_norm": 1.756794810295105, "learning_rate": 1.0203930934674558e-05, "loss": 0.6446, "step": 5231 }, { "epoch": 0.3683210137275607, "grad_norm": 1.7456036806106567, "learning_rate": 1.0202511746497454e-05, "loss": 0.6442, "step": 5232 }, { "epoch": 0.3683914114748328, "grad_norm": 1.7075870037078857, "learning_rate": 1.0201092391818864e-05, "loss": 0.6546, "step": 5233 }, { "epoch": 0.3684618092221049, "grad_norm": 1.7626656293869019, "learning_rate": 1.0199672870712582e-05, "loss": 0.7262, "step": 5234 }, { "epoch": 0.368532206969377, "grad_norm": 1.7301390171051025, "learning_rate": 1.019825318325241e-05, "loss": 0.7734, "step": 5235 }, { "epoch": 0.3686026047166491, "grad_norm": 1.6568595170974731, "learning_rate": 1.0196833329512158e-05, "loss": 0.718, "step": 5236 }, { "epoch": 0.36867300246392115, "grad_norm": 2.011725902557373, "learning_rate": 1.0195413309565647e-05, "loss": 0.6779, "step": 5237 }, { "epoch": 0.36874340021119323, "grad_norm": 1.9658771753311157, "learning_rate": 1.0193993123486702e-05, "loss": 0.6609, "step": 5238 }, { "epoch": 0.3688137979584653, "grad_norm": 2.26755952835083, "learning_rate": 1.0192572771349164e-05, "loss": 0.6931, "step": 5239 }, { "epoch": 0.36888419570573744, "grad_norm": 1.761446237564087, "learning_rate": 1.0191152253226875e-05, "loss": 0.7408, "step": 5240 }, { "epoch": 0.3689545934530095, "grad_norm": 1.9308634996414185, "learning_rate": 1.018973156919369e-05, "loss": 0.691, "step": 5241 }, { "epoch": 0.3690249912002816, "grad_norm": 1.7107572555541992, "learning_rate": 1.0188310719323473e-05, "loss": 0.7825, "step": 5242 }, { "epoch": 0.36909538894755367, "grad_norm": 1.6717792749404907, "learning_rate": 1.0186889703690096e-05, "loss": 0.8142, "step": 5243 }, { "epoch": 0.36916578669482575, "grad_norm": 1.9592125415802002, "learning_rate": 1.0185468522367433e-05, "loss": 0.6331, "step": 5244 }, { "epoch": 0.3692361844420978, "grad_norm": 1.9529261589050293, "learning_rate": 1.0184047175429378e-05, "loss": 0.7019, "step": 5245 }, { "epoch": 0.36930658218936996, "grad_norm": 1.7182368040084839, "learning_rate": 1.018262566294983e-05, "loss": 0.6368, "step": 5246 }, { "epoch": 0.36937697993664204, "grad_norm": 2.1959478855133057, "learning_rate": 1.0181203985002686e-05, "loss": 0.6903, "step": 5247 }, { "epoch": 0.3694473776839141, "grad_norm": 1.734174370765686, "learning_rate": 1.0179782141661869e-05, "loss": 0.7032, "step": 5248 }, { "epoch": 0.3695177754311862, "grad_norm": 1.7132996320724487, "learning_rate": 1.01783601330013e-05, "loss": 0.7647, "step": 5249 }, { "epoch": 0.36958817317845827, "grad_norm": 1.6879916191101074, "learning_rate": 1.0176937959094907e-05, "loss": 0.7457, "step": 5250 }, { "epoch": 0.3696585709257304, "grad_norm": 2.059248924255371, "learning_rate": 1.017551562001663e-05, "loss": 0.7988, "step": 5251 }, { "epoch": 0.3697289686730025, "grad_norm": 1.9319462776184082, "learning_rate": 1.0174093115840425e-05, "loss": 0.681, "step": 5252 }, { "epoch": 0.36979936642027456, "grad_norm": 1.6000701189041138, "learning_rate": 1.017267044664024e-05, "loss": 0.6896, "step": 5253 }, { "epoch": 0.36986976416754663, "grad_norm": 2.0502054691314697, "learning_rate": 1.0171247612490048e-05, "loss": 0.739, "step": 5254 }, { "epoch": 0.3699401619148187, "grad_norm": 1.637516975402832, "learning_rate": 1.0169824613463818e-05, "loss": 0.6366, "step": 5255 }, { "epoch": 0.3700105596620908, "grad_norm": 1.712862491607666, "learning_rate": 1.0168401449635538e-05, "loss": 0.6676, "step": 5256 }, { "epoch": 0.3700809574093629, "grad_norm": 1.8947783708572388, "learning_rate": 1.0166978121079197e-05, "loss": 0.7429, "step": 5257 }, { "epoch": 0.370151355156635, "grad_norm": 1.708625316619873, "learning_rate": 1.0165554627868794e-05, "loss": 0.7112, "step": 5258 }, { "epoch": 0.3702217529039071, "grad_norm": 1.9722927808761597, "learning_rate": 1.016413097007834e-05, "loss": 0.7378, "step": 5259 }, { "epoch": 0.37029215065117915, "grad_norm": 1.6570886373519897, "learning_rate": 1.0162707147781851e-05, "loss": 0.7048, "step": 5260 }, { "epoch": 0.37036254839845123, "grad_norm": 1.7932097911834717, "learning_rate": 1.0161283161053353e-05, "loss": 0.7906, "step": 5261 }, { "epoch": 0.37043294614572336, "grad_norm": 2.1763179302215576, "learning_rate": 1.015985900996688e-05, "loss": 0.6945, "step": 5262 }, { "epoch": 0.37050334389299544, "grad_norm": 2.1959125995635986, "learning_rate": 1.0158434694596475e-05, "loss": 0.5949, "step": 5263 }, { "epoch": 0.3705737416402675, "grad_norm": 1.6461431980133057, "learning_rate": 1.0157010215016192e-05, "loss": 0.559, "step": 5264 }, { "epoch": 0.3706441393875396, "grad_norm": 1.8532779216766357, "learning_rate": 1.0155585571300086e-05, "loss": 0.7305, "step": 5265 }, { "epoch": 0.3707145371348117, "grad_norm": 1.6319704055786133, "learning_rate": 1.015416076352223e-05, "loss": 0.7101, "step": 5266 }, { "epoch": 0.37078493488208375, "grad_norm": 1.7893900871276855, "learning_rate": 1.0152735791756698e-05, "loss": 0.7462, "step": 5267 }, { "epoch": 0.3708553326293559, "grad_norm": 1.6483327150344849, "learning_rate": 1.0151310656077578e-05, "loss": 0.8017, "step": 5268 }, { "epoch": 0.37092573037662796, "grad_norm": 1.6239169836044312, "learning_rate": 1.0149885356558963e-05, "loss": 0.6487, "step": 5269 }, { "epoch": 0.37099612812390004, "grad_norm": 2.3386011123657227, "learning_rate": 1.0148459893274955e-05, "loss": 0.7671, "step": 5270 }, { "epoch": 0.3710665258711721, "grad_norm": 1.7649767398834229, "learning_rate": 1.0147034266299667e-05, "loss": 0.7562, "step": 5271 }, { "epoch": 0.3711369236184442, "grad_norm": 1.7903693914413452, "learning_rate": 1.0145608475707217e-05, "loss": 0.7502, "step": 5272 }, { "epoch": 0.37120732136571627, "grad_norm": 1.5871249437332153, "learning_rate": 1.0144182521571733e-05, "loss": 0.6172, "step": 5273 }, { "epoch": 0.3712777191129884, "grad_norm": 1.7630242109298706, "learning_rate": 1.0142756403967351e-05, "loss": 0.7473, "step": 5274 }, { "epoch": 0.3713481168602605, "grad_norm": 1.535504937171936, "learning_rate": 1.014133012296822e-05, "loss": 0.8685, "step": 5275 }, { "epoch": 0.37141851460753256, "grad_norm": 1.5895111560821533, "learning_rate": 1.0139903678648489e-05, "loss": 0.6186, "step": 5276 }, { "epoch": 0.37148891235480463, "grad_norm": 2.7040889263153076, "learning_rate": 1.0138477071082322e-05, "loss": 0.643, "step": 5277 }, { "epoch": 0.3715593101020767, "grad_norm": 24.40300750732422, "learning_rate": 1.0137050300343892e-05, "loss": 0.7029, "step": 5278 }, { "epoch": 0.37162970784934884, "grad_norm": 1.6729038953781128, "learning_rate": 1.0135623366507373e-05, "loss": 0.6739, "step": 5279 }, { "epoch": 0.3717001055966209, "grad_norm": 1.7552975416183472, "learning_rate": 1.0134196269646955e-05, "loss": 0.6781, "step": 5280 }, { "epoch": 0.371770503343893, "grad_norm": 1.9739738702774048, "learning_rate": 1.0132769009836835e-05, "loss": 0.7175, "step": 5281 }, { "epoch": 0.3718409010911651, "grad_norm": 2.1036131381988525, "learning_rate": 1.013134158715122e-05, "loss": 0.6868, "step": 5282 }, { "epoch": 0.37191129883843715, "grad_norm": 1.7601113319396973, "learning_rate": 1.0129914001664317e-05, "loss": 0.6785, "step": 5283 }, { "epoch": 0.37198169658570923, "grad_norm": 1.7540339231491089, "learning_rate": 1.012848625345035e-05, "loss": 0.6684, "step": 5284 }, { "epoch": 0.37205209433298136, "grad_norm": 1.7120615243911743, "learning_rate": 1.0127058342583551e-05, "loss": 0.8378, "step": 5285 }, { "epoch": 0.37212249208025344, "grad_norm": 2.675217866897583, "learning_rate": 1.0125630269138155e-05, "loss": 0.6893, "step": 5286 }, { "epoch": 0.3721928898275255, "grad_norm": 2.2433011531829834, "learning_rate": 1.0124202033188414e-05, "loss": 0.7734, "step": 5287 }, { "epoch": 0.3722632875747976, "grad_norm": 1.8474833965301514, "learning_rate": 1.0122773634808578e-05, "loss": 0.7053, "step": 5288 }, { "epoch": 0.3723336853220697, "grad_norm": 1.9858371019363403, "learning_rate": 1.0121345074072912e-05, "loss": 0.7986, "step": 5289 }, { "epoch": 0.3724040830693418, "grad_norm": 1.6768808364868164, "learning_rate": 1.0119916351055688e-05, "loss": 0.6783, "step": 5290 }, { "epoch": 0.3724744808166139, "grad_norm": 1.674741268157959, "learning_rate": 1.011848746583119e-05, "loss": 0.7301, "step": 5291 }, { "epoch": 0.37254487856388596, "grad_norm": 1.7185328006744385, "learning_rate": 1.0117058418473702e-05, "loss": 0.6655, "step": 5292 }, { "epoch": 0.37261527631115804, "grad_norm": 1.9729162454605103, "learning_rate": 1.0115629209057524e-05, "loss": 0.7784, "step": 5293 }, { "epoch": 0.3726856740584301, "grad_norm": 1.8761993646621704, "learning_rate": 1.0114199837656963e-05, "loss": 0.7297, "step": 5294 }, { "epoch": 0.3727560718057022, "grad_norm": 2.0601983070373535, "learning_rate": 1.0112770304346331e-05, "loss": 0.8252, "step": 5295 }, { "epoch": 0.3728264695529743, "grad_norm": 1.5327900648117065, "learning_rate": 1.0111340609199952e-05, "loss": 0.6716, "step": 5296 }, { "epoch": 0.3728968673002464, "grad_norm": 1.675683617591858, "learning_rate": 1.0109910752292158e-05, "loss": 0.6941, "step": 5297 }, { "epoch": 0.3729672650475185, "grad_norm": 1.7194623947143555, "learning_rate": 1.0108480733697286e-05, "loss": 0.7686, "step": 5298 }, { "epoch": 0.37303766279479056, "grad_norm": 3.103590965270996, "learning_rate": 1.0107050553489687e-05, "loss": 0.8156, "step": 5299 }, { "epoch": 0.37310806054206264, "grad_norm": 1.8955953121185303, "learning_rate": 1.0105620211743714e-05, "loss": 0.782, "step": 5300 }, { "epoch": 0.37317845828933477, "grad_norm": 2.878431558609009, "learning_rate": 1.0104189708533735e-05, "loss": 0.6781, "step": 5301 }, { "epoch": 0.37324885603660685, "grad_norm": 2.2636687755584717, "learning_rate": 1.010275904393412e-05, "loss": 0.7647, "step": 5302 }, { "epoch": 0.3733192537838789, "grad_norm": 1.7787522077560425, "learning_rate": 1.0101328218019255e-05, "loss": 0.6116, "step": 5303 }, { "epoch": 0.373389651531151, "grad_norm": 1.9748866558074951, "learning_rate": 1.0099897230863525e-05, "loss": 0.7418, "step": 5304 }, { "epoch": 0.3734600492784231, "grad_norm": 1.9209239482879639, "learning_rate": 1.009846608254133e-05, "loss": 0.7749, "step": 5305 }, { "epoch": 0.37353044702569516, "grad_norm": 1.8696320056915283, "learning_rate": 1.0097034773127081e-05, "loss": 0.7943, "step": 5306 }, { "epoch": 0.3736008447729673, "grad_norm": 1.5559605360031128, "learning_rate": 1.0095603302695186e-05, "loss": 0.6976, "step": 5307 }, { "epoch": 0.37367124252023937, "grad_norm": 1.7159825563430786, "learning_rate": 1.0094171671320072e-05, "loss": 0.812, "step": 5308 }, { "epoch": 0.37374164026751144, "grad_norm": 2.1060099601745605, "learning_rate": 1.009273987907617e-05, "loss": 0.6767, "step": 5309 }, { "epoch": 0.3738120380147835, "grad_norm": 1.713591456413269, "learning_rate": 1.0091307926037923e-05, "loss": 0.7183, "step": 5310 }, { "epoch": 0.3738824357620556, "grad_norm": 1.6451973915100098, "learning_rate": 1.0089875812279776e-05, "loss": 0.8686, "step": 5311 }, { "epoch": 0.3739528335093277, "grad_norm": 2.074495553970337, "learning_rate": 1.0088443537876187e-05, "loss": 0.6862, "step": 5312 }, { "epoch": 0.3740232312565998, "grad_norm": 1.9609813690185547, "learning_rate": 1.0087011102901623e-05, "loss": 0.7647, "step": 5313 }, { "epoch": 0.3740936290038719, "grad_norm": 2.0789029598236084, "learning_rate": 1.0085578507430552e-05, "loss": 0.8042, "step": 5314 }, { "epoch": 0.37416402675114396, "grad_norm": 1.8454471826553345, "learning_rate": 1.0084145751537462e-05, "loss": 0.7691, "step": 5315 }, { "epoch": 0.37423442449841604, "grad_norm": 1.7699419260025024, "learning_rate": 1.0082712835296844e-05, "loss": 0.7042, "step": 5316 }, { "epoch": 0.3743048222456881, "grad_norm": 1.9046812057495117, "learning_rate": 1.0081279758783192e-05, "loss": 0.7076, "step": 5317 }, { "epoch": 0.37437521999296025, "grad_norm": 1.7194609642028809, "learning_rate": 1.0079846522071014e-05, "loss": 0.5975, "step": 5318 }, { "epoch": 0.37444561774023233, "grad_norm": 1.7808161973953247, "learning_rate": 1.0078413125234828e-05, "loss": 0.7619, "step": 5319 }, { "epoch": 0.3745160154875044, "grad_norm": 2.939826488494873, "learning_rate": 1.0076979568349153e-05, "loss": 0.6928, "step": 5320 }, { "epoch": 0.3745864132347765, "grad_norm": 1.5947803258895874, "learning_rate": 1.0075545851488524e-05, "loss": 0.6526, "step": 5321 }, { "epoch": 0.37465681098204856, "grad_norm": 1.8662793636322021, "learning_rate": 1.0074111974727483e-05, "loss": 0.7929, "step": 5322 }, { "epoch": 0.37472720872932064, "grad_norm": 2.086256742477417, "learning_rate": 1.0072677938140573e-05, "loss": 0.662, "step": 5323 }, { "epoch": 0.37479760647659277, "grad_norm": 2.270972967147827, "learning_rate": 1.0071243741802357e-05, "loss": 0.6333, "step": 5324 }, { "epoch": 0.37486800422386485, "grad_norm": 1.7108832597732544, "learning_rate": 1.0069809385787396e-05, "loss": 0.7102, "step": 5325 }, { "epoch": 0.3749384019711369, "grad_norm": 1.854067087173462, "learning_rate": 1.0068374870170266e-05, "loss": 0.6783, "step": 5326 }, { "epoch": 0.375008799718409, "grad_norm": 1.9847677946090698, "learning_rate": 1.0066940195025546e-05, "loss": 0.6757, "step": 5327 }, { "epoch": 0.3750791974656811, "grad_norm": 1.6450245380401611, "learning_rate": 1.0065505360427829e-05, "loss": 0.7073, "step": 5328 }, { "epoch": 0.3751495952129532, "grad_norm": 1.7886626720428467, "learning_rate": 1.006407036645171e-05, "loss": 0.7768, "step": 5329 }, { "epoch": 0.3752199929602253, "grad_norm": 2.33428692817688, "learning_rate": 1.0062635213171801e-05, "loss": 0.6533, "step": 5330 }, { "epoch": 0.37529039070749737, "grad_norm": 1.6418706178665161, "learning_rate": 1.006119990066271e-05, "loss": 0.7096, "step": 5331 }, { "epoch": 0.37536078845476945, "grad_norm": 1.7161122560501099, "learning_rate": 1.0059764428999065e-05, "loss": 0.7008, "step": 5332 }, { "epoch": 0.3754311862020415, "grad_norm": 1.880574345588684, "learning_rate": 1.0058328798255498e-05, "loss": 0.6446, "step": 5333 }, { "epoch": 0.3755015839493136, "grad_norm": 1.982589602470398, "learning_rate": 1.0056893008506645e-05, "loss": 0.6781, "step": 5334 }, { "epoch": 0.37557198169658573, "grad_norm": 1.8677778244018555, "learning_rate": 1.0055457059827156e-05, "loss": 0.6995, "step": 5335 }, { "epoch": 0.3756423794438578, "grad_norm": 2.078749656677246, "learning_rate": 1.005402095229169e-05, "loss": 0.7679, "step": 5336 }, { "epoch": 0.3757127771911299, "grad_norm": 2.18495774269104, "learning_rate": 1.0052584685974906e-05, "loss": 0.6782, "step": 5337 }, { "epoch": 0.37578317493840196, "grad_norm": 2.020418643951416, "learning_rate": 1.005114826095148e-05, "loss": 0.7499, "step": 5338 }, { "epoch": 0.37585357268567404, "grad_norm": 1.8028045892715454, "learning_rate": 1.0049711677296094e-05, "loss": 0.606, "step": 5339 }, { "epoch": 0.3759239704329461, "grad_norm": 2.082176446914673, "learning_rate": 1.0048274935083436e-05, "loss": 0.6847, "step": 5340 }, { "epoch": 0.37599436818021825, "grad_norm": 1.987864375114441, "learning_rate": 1.0046838034388202e-05, "loss": 0.7121, "step": 5341 }, { "epoch": 0.37606476592749033, "grad_norm": 2.2933692932128906, "learning_rate": 1.0045400975285101e-05, "loss": 0.7778, "step": 5342 }, { "epoch": 0.3761351636747624, "grad_norm": 1.778998613357544, "learning_rate": 1.0043963757848843e-05, "loss": 0.6238, "step": 5343 }, { "epoch": 0.3762055614220345, "grad_norm": 1.8387465476989746, "learning_rate": 1.0042526382154153e-05, "loss": 0.7837, "step": 5344 }, { "epoch": 0.37627595916930656, "grad_norm": 1.5317102670669556, "learning_rate": 1.0041088848275761e-05, "loss": 0.6937, "step": 5345 }, { "epoch": 0.3763463569165787, "grad_norm": 1.8663336038589478, "learning_rate": 1.0039651156288406e-05, "loss": 0.7749, "step": 5346 }, { "epoch": 0.37641675466385077, "grad_norm": 1.7656573057174683, "learning_rate": 1.0038213306266835e-05, "loss": 0.6131, "step": 5347 }, { "epoch": 0.37648715241112285, "grad_norm": 1.5636613368988037, "learning_rate": 1.0036775298285802e-05, "loss": 0.6285, "step": 5348 }, { "epoch": 0.3765575501583949, "grad_norm": 1.8393199443817139, "learning_rate": 1.003533713242007e-05, "loss": 0.6768, "step": 5349 }, { "epoch": 0.376627947905667, "grad_norm": 3.3449630737304688, "learning_rate": 1.0033898808744412e-05, "loss": 0.7329, "step": 5350 }, { "epoch": 0.3766983456529391, "grad_norm": 1.92350172996521, "learning_rate": 1.003246032733361e-05, "loss": 0.6818, "step": 5351 }, { "epoch": 0.3767687434002112, "grad_norm": 1.853430151939392, "learning_rate": 1.0031021688262447e-05, "loss": 0.7002, "step": 5352 }, { "epoch": 0.3768391411474833, "grad_norm": 1.7913384437561035, "learning_rate": 1.002958289160572e-05, "loss": 0.6596, "step": 5353 }, { "epoch": 0.37690953889475537, "grad_norm": 1.778450608253479, "learning_rate": 1.0028143937438236e-05, "loss": 0.7821, "step": 5354 }, { "epoch": 0.37697993664202745, "grad_norm": 1.6053636074066162, "learning_rate": 1.0026704825834807e-05, "loss": 0.7137, "step": 5355 }, { "epoch": 0.3770503343892995, "grad_norm": 1.9291224479675293, "learning_rate": 1.0025265556870252e-05, "loss": 0.7205, "step": 5356 }, { "epoch": 0.37712073213657166, "grad_norm": 1.9443745613098145, "learning_rate": 1.0023826130619402e-05, "loss": 0.7322, "step": 5357 }, { "epoch": 0.37719112988384373, "grad_norm": 1.6640493869781494, "learning_rate": 1.0022386547157091e-05, "loss": 0.7164, "step": 5358 }, { "epoch": 0.3772615276311158, "grad_norm": 1.9136216640472412, "learning_rate": 1.0020946806558167e-05, "loss": 0.6796, "step": 5359 }, { "epoch": 0.3773319253783879, "grad_norm": 1.715802550315857, "learning_rate": 1.0019506908897484e-05, "loss": 0.7418, "step": 5360 }, { "epoch": 0.37740232312565997, "grad_norm": 1.9123260974884033, "learning_rate": 1.00180668542499e-05, "loss": 0.8662, "step": 5361 }, { "epoch": 0.37747272087293204, "grad_norm": 1.7322694063186646, "learning_rate": 1.0016626642690288e-05, "loss": 0.5995, "step": 5362 }, { "epoch": 0.3775431186202042, "grad_norm": 1.8171782493591309, "learning_rate": 1.0015186274293524e-05, "loss": 0.7169, "step": 5363 }, { "epoch": 0.37761351636747625, "grad_norm": 1.7371406555175781, "learning_rate": 1.0013745749134499e-05, "loss": 0.8328, "step": 5364 }, { "epoch": 0.37768391411474833, "grad_norm": 2.034255027770996, "learning_rate": 1.0012305067288098e-05, "loss": 0.676, "step": 5365 }, { "epoch": 0.3777543118620204, "grad_norm": 1.8474699258804321, "learning_rate": 1.001086422882923e-05, "loss": 0.7084, "step": 5366 }, { "epoch": 0.3778247096092925, "grad_norm": 1.5765087604522705, "learning_rate": 1.0009423233832804e-05, "loss": 0.6624, "step": 5367 }, { "epoch": 0.37789510735656456, "grad_norm": 1.6165974140167236, "learning_rate": 1.000798208237374e-05, "loss": 0.5427, "step": 5368 }, { "epoch": 0.3779655051038367, "grad_norm": 2.0014312267303467, "learning_rate": 1.0006540774526962e-05, "loss": 0.7155, "step": 5369 }, { "epoch": 0.3780359028511088, "grad_norm": 1.6871329545974731, "learning_rate": 1.0005099310367406e-05, "loss": 0.6796, "step": 5370 }, { "epoch": 0.37810630059838085, "grad_norm": 1.672567367553711, "learning_rate": 1.0003657689970016e-05, "loss": 0.7596, "step": 5371 }, { "epoch": 0.37817669834565293, "grad_norm": 1.5686231851577759, "learning_rate": 1.0002215913409742e-05, "loss": 0.6148, "step": 5372 }, { "epoch": 0.378247096092925, "grad_norm": 1.7239083051681519, "learning_rate": 1.0000773980761544e-05, "loss": 0.784, "step": 5373 }, { "epoch": 0.37831749384019714, "grad_norm": 2.0698776245117188, "learning_rate": 9.999331892100388e-06, "loss": 0.7348, "step": 5374 }, { "epoch": 0.3783878915874692, "grad_norm": 2.0022168159484863, "learning_rate": 9.997889647501251e-06, "loss": 0.7668, "step": 5375 }, { "epoch": 0.3784582893347413, "grad_norm": 1.8236323595046997, "learning_rate": 9.996447247039119e-06, "loss": 0.7121, "step": 5376 }, { "epoch": 0.37852868708201337, "grad_norm": 2.380829334259033, "learning_rate": 9.995004690788976e-06, "loss": 0.7142, "step": 5377 }, { "epoch": 0.37859908482928545, "grad_norm": 1.804495096206665, "learning_rate": 9.99356197882583e-06, "loss": 0.7939, "step": 5378 }, { "epoch": 0.3786694825765575, "grad_norm": 1.7329933643341064, "learning_rate": 9.992119111224685e-06, "loss": 0.7004, "step": 5379 }, { "epoch": 0.37873988032382966, "grad_norm": 1.650241732597351, "learning_rate": 9.990676088060557e-06, "loss": 0.7129, "step": 5380 }, { "epoch": 0.37881027807110174, "grad_norm": 1.6394497156143188, "learning_rate": 9.98923290940847e-06, "loss": 0.609, "step": 5381 }, { "epoch": 0.3788806758183738, "grad_norm": 1.8784000873565674, "learning_rate": 9.987789575343458e-06, "loss": 0.7496, "step": 5382 }, { "epoch": 0.3789510735656459, "grad_norm": 1.9606164693832397, "learning_rate": 9.98634608594056e-06, "loss": 0.7107, "step": 5383 }, { "epoch": 0.37902147131291797, "grad_norm": 1.9121488332748413, "learning_rate": 9.984902441274825e-06, "loss": 0.727, "step": 5384 }, { "epoch": 0.3790918690601901, "grad_norm": 1.9320366382598877, "learning_rate": 9.983458641421307e-06, "loss": 0.6247, "step": 5385 }, { "epoch": 0.3791622668074622, "grad_norm": 1.532025694847107, "learning_rate": 9.982014686455074e-06, "loss": 0.6936, "step": 5386 }, { "epoch": 0.37923266455473426, "grad_norm": 1.542554497718811, "learning_rate": 9.980570576451196e-06, "loss": 0.6672, "step": 5387 }, { "epoch": 0.37930306230200633, "grad_norm": 2.0299131870269775, "learning_rate": 9.979126311484754e-06, "loss": 0.7724, "step": 5388 }, { "epoch": 0.3793734600492784, "grad_norm": 1.676446795463562, "learning_rate": 9.977681891630837e-06, "loss": 0.6177, "step": 5389 }, { "epoch": 0.3794438577965505, "grad_norm": 2.5348141193389893, "learning_rate": 9.976237316964544e-06, "loss": 0.7803, "step": 5390 }, { "epoch": 0.3795142555438226, "grad_norm": 3.3527660369873047, "learning_rate": 9.974792587560975e-06, "loss": 0.717, "step": 5391 }, { "epoch": 0.3795846532910947, "grad_norm": 1.6299231052398682, "learning_rate": 9.973347703495246e-06, "loss": 0.6553, "step": 5392 }, { "epoch": 0.3796550510383668, "grad_norm": 1.875382661819458, "learning_rate": 9.971902664842478e-06, "loss": 0.5663, "step": 5393 }, { "epoch": 0.37972544878563885, "grad_norm": 1.5284674167633057, "learning_rate": 9.970457471677796e-06, "loss": 0.6985, "step": 5394 }, { "epoch": 0.37979584653291093, "grad_norm": 1.6339986324310303, "learning_rate": 9.969012124076342e-06, "loss": 0.7223, "step": 5395 }, { "epoch": 0.379866244280183, "grad_norm": 1.856506109237671, "learning_rate": 9.96756662211326e-06, "loss": 0.6104, "step": 5396 }, { "epoch": 0.37993664202745514, "grad_norm": 2.1086597442626953, "learning_rate": 9.966120965863698e-06, "loss": 0.63, "step": 5397 }, { "epoch": 0.3800070397747272, "grad_norm": 2.0636184215545654, "learning_rate": 9.964675155402824e-06, "loss": 0.6404, "step": 5398 }, { "epoch": 0.3800774375219993, "grad_norm": 2.107306718826294, "learning_rate": 9.963229190805804e-06, "loss": 0.6999, "step": 5399 }, { "epoch": 0.3801478352692714, "grad_norm": 1.6406822204589844, "learning_rate": 9.961783072147814e-06, "loss": 0.7408, "step": 5400 }, { "epoch": 0.38021823301654345, "grad_norm": 1.8856507539749146, "learning_rate": 9.960336799504037e-06, "loss": 0.8238, "step": 5401 }, { "epoch": 0.3802886307638156, "grad_norm": 1.8934967517852783, "learning_rate": 9.958890372949672e-06, "loss": 0.6838, "step": 5402 }, { "epoch": 0.38035902851108766, "grad_norm": 2.0962276458740234, "learning_rate": 9.957443792559914e-06, "loss": 0.7578, "step": 5403 }, { "epoch": 0.38042942625835974, "grad_norm": 2.079969644546509, "learning_rate": 9.955997058409977e-06, "loss": 0.7743, "step": 5404 }, { "epoch": 0.3804998240056318, "grad_norm": 1.7707676887512207, "learning_rate": 9.954550170575076e-06, "loss": 0.7214, "step": 5405 }, { "epoch": 0.3805702217529039, "grad_norm": 1.832004189491272, "learning_rate": 9.953103129130435e-06, "loss": 0.7296, "step": 5406 }, { "epoch": 0.38064061950017597, "grad_norm": 1.7930132150650024, "learning_rate": 9.951655934151287e-06, "loss": 0.6398, "step": 5407 }, { "epoch": 0.3807110172474481, "grad_norm": 1.7455673217773438, "learning_rate": 9.950208585712877e-06, "loss": 0.6706, "step": 5408 }, { "epoch": 0.3807814149947202, "grad_norm": 1.6153088808059692, "learning_rate": 9.948761083890448e-06, "loss": 0.7203, "step": 5409 }, { "epoch": 0.38085181274199226, "grad_norm": 1.8846722841262817, "learning_rate": 9.94731342875926e-06, "loss": 0.705, "step": 5410 }, { "epoch": 0.38092221048926433, "grad_norm": 1.7720093727111816, "learning_rate": 9.945865620394578e-06, "loss": 0.6559, "step": 5411 }, { "epoch": 0.3809926082365364, "grad_norm": 2.1405930519104004, "learning_rate": 9.944417658871673e-06, "loss": 0.7616, "step": 5412 }, { "epoch": 0.38106300598380854, "grad_norm": 1.9164962768554688, "learning_rate": 9.94296954426583e-06, "loss": 0.7202, "step": 5413 }, { "epoch": 0.3811334037310806, "grad_norm": 2.0604851245880127, "learning_rate": 9.941521276652333e-06, "loss": 0.689, "step": 5414 }, { "epoch": 0.3812038014783527, "grad_norm": 1.652294397354126, "learning_rate": 9.940072856106483e-06, "loss": 0.6988, "step": 5415 }, { "epoch": 0.3812741992256248, "grad_norm": 1.905492901802063, "learning_rate": 9.93862428270358e-06, "loss": 0.7544, "step": 5416 }, { "epoch": 0.38134459697289685, "grad_norm": 1.8604322671890259, "learning_rate": 9.937175556518939e-06, "loss": 0.7726, "step": 5417 }, { "epoch": 0.38141499472016893, "grad_norm": 2.2112197875976562, "learning_rate": 9.935726677627882e-06, "loss": 0.717, "step": 5418 }, { "epoch": 0.38148539246744106, "grad_norm": 1.941248893737793, "learning_rate": 9.934277646105737e-06, "loss": 0.7066, "step": 5419 }, { "epoch": 0.38155579021471314, "grad_norm": 2.2982091903686523, "learning_rate": 9.932828462027837e-06, "loss": 0.7631, "step": 5420 }, { "epoch": 0.3816261879619852, "grad_norm": 1.7509204149246216, "learning_rate": 9.931379125469532e-06, "loss": 0.6311, "step": 5421 }, { "epoch": 0.3816965857092573, "grad_norm": 1.9317138195037842, "learning_rate": 9.929929636506172e-06, "loss": 0.7436, "step": 5422 }, { "epoch": 0.3817669834565294, "grad_norm": 1.920300006866455, "learning_rate": 9.928479995213114e-06, "loss": 0.7388, "step": 5423 }, { "epoch": 0.38183738120380145, "grad_norm": 1.7973048686981201, "learning_rate": 9.927030201665731e-06, "loss": 0.733, "step": 5424 }, { "epoch": 0.3819077789510736, "grad_norm": 2.1074042320251465, "learning_rate": 9.925580255939395e-06, "loss": 0.6679, "step": 5425 }, { "epoch": 0.38197817669834566, "grad_norm": 1.690592885017395, "learning_rate": 9.924130158109493e-06, "loss": 0.5862, "step": 5426 }, { "epoch": 0.38204857444561774, "grad_norm": 1.746494174003601, "learning_rate": 9.922679908251417e-06, "loss": 0.6653, "step": 5427 }, { "epoch": 0.3821189721928898, "grad_norm": 2.034618377685547, "learning_rate": 9.921229506440564e-06, "loss": 0.7269, "step": 5428 }, { "epoch": 0.3821893699401619, "grad_norm": 2.545729637145996, "learning_rate": 9.919778952752343e-06, "loss": 0.6695, "step": 5429 }, { "epoch": 0.382259767687434, "grad_norm": 1.4270471334457397, "learning_rate": 9.918328247262171e-06, "loss": 0.5903, "step": 5430 }, { "epoch": 0.3823301654347061, "grad_norm": 1.7483727931976318, "learning_rate": 9.916877390045469e-06, "loss": 0.7069, "step": 5431 }, { "epoch": 0.3824005631819782, "grad_norm": 1.7942904233932495, "learning_rate": 9.915426381177672e-06, "loss": 0.6487, "step": 5432 }, { "epoch": 0.38247096092925026, "grad_norm": 1.7310653924942017, "learning_rate": 9.913975220734215e-06, "loss": 0.7562, "step": 5433 }, { "epoch": 0.38254135867652234, "grad_norm": 1.6719640493392944, "learning_rate": 9.91252390879055e-06, "loss": 0.7727, "step": 5434 }, { "epoch": 0.3826117564237944, "grad_norm": 2.5377418994903564, "learning_rate": 9.911072445422125e-06, "loss": 0.7976, "step": 5435 }, { "epoch": 0.38268215417106655, "grad_norm": 1.7806369066238403, "learning_rate": 9.90962083070441e-06, "loss": 0.6842, "step": 5436 }, { "epoch": 0.3827525519183386, "grad_norm": 1.8802837133407593, "learning_rate": 9.908169064712873e-06, "loss": 0.7328, "step": 5437 }, { "epoch": 0.3828229496656107, "grad_norm": 1.6242834329605103, "learning_rate": 9.906717147522993e-06, "loss": 0.6548, "step": 5438 }, { "epoch": 0.3828933474128828, "grad_norm": 1.5010706186294556, "learning_rate": 9.905265079210255e-06, "loss": 0.6541, "step": 5439 }, { "epoch": 0.38296374516015486, "grad_norm": 1.7151145935058594, "learning_rate": 9.903812859850154e-06, "loss": 0.6173, "step": 5440 }, { "epoch": 0.383034142907427, "grad_norm": 1.733075499534607, "learning_rate": 9.902360489518195e-06, "loss": 0.7025, "step": 5441 }, { "epoch": 0.38310454065469907, "grad_norm": 2.04484224319458, "learning_rate": 9.900907968289882e-06, "loss": 0.631, "step": 5442 }, { "epoch": 0.38317493840197114, "grad_norm": 1.658518671989441, "learning_rate": 9.899455296240738e-06, "loss": 0.7258, "step": 5443 }, { "epoch": 0.3832453361492432, "grad_norm": 1.773139476776123, "learning_rate": 9.898002473446289e-06, "loss": 0.7011, "step": 5444 }, { "epoch": 0.3833157338965153, "grad_norm": 2.8763339519500732, "learning_rate": 9.896549499982063e-06, "loss": 0.6225, "step": 5445 }, { "epoch": 0.3833861316437874, "grad_norm": 1.49462890625, "learning_rate": 9.895096375923607e-06, "loss": 0.7976, "step": 5446 }, { "epoch": 0.3834565293910595, "grad_norm": 1.7840460538864136, "learning_rate": 9.893643101346471e-06, "loss": 0.847, "step": 5447 }, { "epoch": 0.3835269271383316, "grad_norm": 2.023862361907959, "learning_rate": 9.892189676326206e-06, "loss": 0.8902, "step": 5448 }, { "epoch": 0.38359732488560366, "grad_norm": 1.815556526184082, "learning_rate": 9.890736100938382e-06, "loss": 0.7474, "step": 5449 }, { "epoch": 0.38366772263287574, "grad_norm": 1.677909016609192, "learning_rate": 9.889282375258572e-06, "loss": 0.7014, "step": 5450 }, { "epoch": 0.3837381203801478, "grad_norm": 1.6628671884536743, "learning_rate": 9.88782849936235e-06, "loss": 0.697, "step": 5451 }, { "epoch": 0.3838085181274199, "grad_norm": 1.5931270122528076, "learning_rate": 9.886374473325312e-06, "loss": 0.64, "step": 5452 }, { "epoch": 0.38387891587469203, "grad_norm": 1.5688481330871582, "learning_rate": 9.88492029722305e-06, "loss": 0.6776, "step": 5453 }, { "epoch": 0.3839493136219641, "grad_norm": 1.6977131366729736, "learning_rate": 9.88346597113117e-06, "loss": 0.7816, "step": 5454 }, { "epoch": 0.3840197113692362, "grad_norm": 1.942418098449707, "learning_rate": 9.882011495125282e-06, "loss": 0.682, "step": 5455 }, { "epoch": 0.38409010911650826, "grad_norm": 2.0211074352264404, "learning_rate": 9.880556869281007e-06, "loss": 0.7102, "step": 5456 }, { "epoch": 0.38416050686378034, "grad_norm": 2.2531683444976807, "learning_rate": 9.879102093673972e-06, "loss": 0.689, "step": 5457 }, { "epoch": 0.38423090461105247, "grad_norm": 1.9771634340286255, "learning_rate": 9.87764716837981e-06, "loss": 0.6562, "step": 5458 }, { "epoch": 0.38430130235832455, "grad_norm": 2.429993152618408, "learning_rate": 9.876192093474167e-06, "loss": 0.688, "step": 5459 }, { "epoch": 0.3843717001055966, "grad_norm": 2.0451362133026123, "learning_rate": 9.87473686903269e-06, "loss": 0.7217, "step": 5460 }, { "epoch": 0.3844420978528687, "grad_norm": 1.6083104610443115, "learning_rate": 9.873281495131041e-06, "loss": 0.6862, "step": 5461 }, { "epoch": 0.3845124956001408, "grad_norm": 2.018548011779785, "learning_rate": 9.871825971844886e-06, "loss": 0.7826, "step": 5462 }, { "epoch": 0.38458289334741286, "grad_norm": 1.798581600189209, "learning_rate": 9.870370299249897e-06, "loss": 0.6343, "step": 5463 }, { "epoch": 0.384653291094685, "grad_norm": 1.7351197004318237, "learning_rate": 9.868914477421757e-06, "loss": 0.6723, "step": 5464 }, { "epoch": 0.38472368884195707, "grad_norm": 1.4689069986343384, "learning_rate": 9.867458506436156e-06, "loss": 0.6523, "step": 5465 }, { "epoch": 0.38479408658922915, "grad_norm": 1.7129102945327759, "learning_rate": 9.866002386368787e-06, "loss": 0.8188, "step": 5466 }, { "epoch": 0.3848644843365012, "grad_norm": 2.063417911529541, "learning_rate": 9.86454611729536e-06, "loss": 0.8082, "step": 5467 }, { "epoch": 0.3849348820837733, "grad_norm": 1.6520217657089233, "learning_rate": 9.863089699291587e-06, "loss": 0.7352, "step": 5468 }, { "epoch": 0.38500527983104543, "grad_norm": 2.491246223449707, "learning_rate": 9.861633132433189e-06, "loss": 0.682, "step": 5469 }, { "epoch": 0.3850756775783175, "grad_norm": 1.4545092582702637, "learning_rate": 9.860176416795891e-06, "loss": 0.6335, "step": 5470 }, { "epoch": 0.3851460753255896, "grad_norm": 1.724002480506897, "learning_rate": 9.858719552455428e-06, "loss": 0.617, "step": 5471 }, { "epoch": 0.38521647307286166, "grad_norm": 1.8568593263626099, "learning_rate": 9.85726253948755e-06, "loss": 0.6157, "step": 5472 }, { "epoch": 0.38528687082013374, "grad_norm": 1.8755921125411987, "learning_rate": 9.855805377968004e-06, "loss": 0.681, "step": 5473 }, { "epoch": 0.3853572685674058, "grad_norm": 1.6114517450332642, "learning_rate": 9.85434806797255e-06, "loss": 0.6811, "step": 5474 }, { "epoch": 0.38542766631467795, "grad_norm": 2.003673553466797, "learning_rate": 9.852890609576957e-06, "loss": 0.817, "step": 5475 }, { "epoch": 0.38549806406195003, "grad_norm": 1.8812379837036133, "learning_rate": 9.851433002856992e-06, "loss": 0.7006, "step": 5476 }, { "epoch": 0.3855684618092221, "grad_norm": 2.1441457271575928, "learning_rate": 9.849975247888446e-06, "loss": 0.8069, "step": 5477 }, { "epoch": 0.3856388595564942, "grad_norm": 1.782468318939209, "learning_rate": 9.848517344747108e-06, "loss": 0.7295, "step": 5478 }, { "epoch": 0.38570925730376626, "grad_norm": 1.771806240081787, "learning_rate": 9.84705929350877e-06, "loss": 0.6455, "step": 5479 }, { "epoch": 0.38577965505103834, "grad_norm": 2.2588231563568115, "learning_rate": 9.845601094249241e-06, "loss": 0.7738, "step": 5480 }, { "epoch": 0.38585005279831047, "grad_norm": 1.7788875102996826, "learning_rate": 9.844142747044336e-06, "loss": 0.6965, "step": 5481 }, { "epoch": 0.38592045054558255, "grad_norm": 1.8683902025222778, "learning_rate": 9.842684251969872e-06, "loss": 0.7494, "step": 5482 }, { "epoch": 0.3859908482928546, "grad_norm": 1.7153949737548828, "learning_rate": 9.841225609101682e-06, "loss": 0.608, "step": 5483 }, { "epoch": 0.3860612460401267, "grad_norm": 1.6623295545578003, "learning_rate": 9.839766818515596e-06, "loss": 0.7013, "step": 5484 }, { "epoch": 0.3861316437873988, "grad_norm": 1.794837474822998, "learning_rate": 9.838307880287464e-06, "loss": 0.7167, "step": 5485 }, { "epoch": 0.3862020415346709, "grad_norm": 1.9074307680130005, "learning_rate": 9.836848794493133e-06, "loss": 0.8325, "step": 5486 }, { "epoch": 0.386272439281943, "grad_norm": 1.783678650856018, "learning_rate": 9.835389561208466e-06, "loss": 0.8131, "step": 5487 }, { "epoch": 0.38634283702921507, "grad_norm": 1.8474059104919434, "learning_rate": 9.833930180509324e-06, "loss": 0.6207, "step": 5488 }, { "epoch": 0.38641323477648715, "grad_norm": 1.6404483318328857, "learning_rate": 9.832470652471589e-06, "loss": 0.7315, "step": 5489 }, { "epoch": 0.3864836325237592, "grad_norm": 1.7419829368591309, "learning_rate": 9.831010977171139e-06, "loss": 0.7266, "step": 5490 }, { "epoch": 0.3865540302710313, "grad_norm": 1.8262670040130615, "learning_rate": 9.829551154683862e-06, "loss": 0.7078, "step": 5491 }, { "epoch": 0.38662442801830343, "grad_norm": 1.7427153587341309, "learning_rate": 9.828091185085661e-06, "loss": 0.7214, "step": 5492 }, { "epoch": 0.3866948257655755, "grad_norm": 2.2244904041290283, "learning_rate": 9.826631068452436e-06, "loss": 0.8052, "step": 5493 }, { "epoch": 0.3867652235128476, "grad_norm": 2.169015884399414, "learning_rate": 9.825170804860102e-06, "loss": 0.6946, "step": 5494 }, { "epoch": 0.38683562126011967, "grad_norm": 1.687139868736267, "learning_rate": 9.82371039438458e-06, "loss": 0.7275, "step": 5495 }, { "epoch": 0.38690601900739174, "grad_norm": 1.883980631828308, "learning_rate": 9.822249837101797e-06, "loss": 0.6597, "step": 5496 }, { "epoch": 0.3869764167546639, "grad_norm": 1.8745591640472412, "learning_rate": 9.82078913308769e-06, "loss": 0.8758, "step": 5497 }, { "epoch": 0.38704681450193595, "grad_norm": 1.921337604522705, "learning_rate": 9.8193282824182e-06, "loss": 0.8123, "step": 5498 }, { "epoch": 0.38711721224920803, "grad_norm": 1.8899238109588623, "learning_rate": 9.81786728516928e-06, "loss": 0.7704, "step": 5499 }, { "epoch": 0.3871876099964801, "grad_norm": 1.7792056798934937, "learning_rate": 9.816406141416887e-06, "loss": 0.6385, "step": 5500 }, { "epoch": 0.3872580077437522, "grad_norm": 1.7850033044815063, "learning_rate": 9.81494485123699e-06, "loss": 0.7413, "step": 5501 }, { "epoch": 0.38732840549102426, "grad_norm": 2.668912887573242, "learning_rate": 9.813483414705558e-06, "loss": 0.7166, "step": 5502 }, { "epoch": 0.3873988032382964, "grad_norm": 2.1849663257598877, "learning_rate": 9.812021831898575e-06, "loss": 0.7033, "step": 5503 }, { "epoch": 0.3874692009855685, "grad_norm": 1.7751823663711548, "learning_rate": 9.810560102892034e-06, "loss": 0.7403, "step": 5504 }, { "epoch": 0.38753959873284055, "grad_norm": 1.7217925786972046, "learning_rate": 9.809098227761925e-06, "loss": 0.776, "step": 5505 }, { "epoch": 0.38760999648011263, "grad_norm": 1.7495076656341553, "learning_rate": 9.807636206584254e-06, "loss": 0.7212, "step": 5506 }, { "epoch": 0.3876803942273847, "grad_norm": 2.0366933345794678, "learning_rate": 9.806174039435037e-06, "loss": 0.7803, "step": 5507 }, { "epoch": 0.3877507919746568, "grad_norm": 1.7918260097503662, "learning_rate": 9.804711726390288e-06, "loss": 0.7786, "step": 5508 }, { "epoch": 0.3878211897219289, "grad_norm": 1.8992435932159424, "learning_rate": 9.803249267526038e-06, "loss": 0.7378, "step": 5509 }, { "epoch": 0.387891587469201, "grad_norm": 2.0526819229125977, "learning_rate": 9.801786662918318e-06, "loss": 0.7413, "step": 5510 }, { "epoch": 0.38796198521647307, "grad_norm": 1.7563878297805786, "learning_rate": 9.800323912643172e-06, "loss": 0.7197, "step": 5511 }, { "epoch": 0.38803238296374515, "grad_norm": 1.7936469316482544, "learning_rate": 9.79886101677665e-06, "loss": 0.7415, "step": 5512 }, { "epoch": 0.3881027807110172, "grad_norm": 2.1601929664611816, "learning_rate": 9.797397975394811e-06, "loss": 0.6949, "step": 5513 }, { "epoch": 0.38817317845828936, "grad_norm": 1.9066649675369263, "learning_rate": 9.795934788573717e-06, "loss": 0.6963, "step": 5514 }, { "epoch": 0.38824357620556144, "grad_norm": 1.7406415939331055, "learning_rate": 9.79447145638944e-06, "loss": 0.611, "step": 5515 }, { "epoch": 0.3883139739528335, "grad_norm": 1.5542773008346558, "learning_rate": 9.79300797891806e-06, "loss": 0.6599, "step": 5516 }, { "epoch": 0.3883843717001056, "grad_norm": 1.883857250213623, "learning_rate": 9.791544356235667e-06, "loss": 0.8528, "step": 5517 }, { "epoch": 0.38845476944737767, "grad_norm": 1.9469116926193237, "learning_rate": 9.790080588418355e-06, "loss": 0.7981, "step": 5518 }, { "epoch": 0.38852516719464975, "grad_norm": 1.4750159978866577, "learning_rate": 9.788616675542227e-06, "loss": 0.8376, "step": 5519 }, { "epoch": 0.3885955649419219, "grad_norm": 2.733816385269165, "learning_rate": 9.787152617683392e-06, "loss": 0.7147, "step": 5520 }, { "epoch": 0.38866596268919396, "grad_norm": 2.1390366554260254, "learning_rate": 9.785688414917967e-06, "loss": 0.7528, "step": 5521 }, { "epoch": 0.38873636043646603, "grad_norm": 1.5453308820724487, "learning_rate": 9.78422406732208e-06, "loss": 0.8661, "step": 5522 }, { "epoch": 0.3888067581837381, "grad_norm": 1.8129229545593262, "learning_rate": 9.782759574971862e-06, "loss": 0.7151, "step": 5523 }, { "epoch": 0.3888771559310102, "grad_norm": 1.7061574459075928, "learning_rate": 9.781294937943453e-06, "loss": 0.7256, "step": 5524 }, { "epoch": 0.3889475536782823, "grad_norm": 1.8579649925231934, "learning_rate": 9.779830156313e-06, "loss": 0.6689, "step": 5525 }, { "epoch": 0.3890179514255544, "grad_norm": 2.1256520748138428, "learning_rate": 9.778365230156662e-06, "loss": 0.6553, "step": 5526 }, { "epoch": 0.3890883491728265, "grad_norm": 1.482027530670166, "learning_rate": 9.776900159550598e-06, "loss": 0.7098, "step": 5527 }, { "epoch": 0.38915874692009855, "grad_norm": 1.990539312362671, "learning_rate": 9.77543494457098e-06, "loss": 0.7583, "step": 5528 }, { "epoch": 0.38922914466737063, "grad_norm": 1.6218023300170898, "learning_rate": 9.773969585293988e-06, "loss": 0.5983, "step": 5529 }, { "epoch": 0.3892995424146427, "grad_norm": 1.649764060974121, "learning_rate": 9.772504081795801e-06, "loss": 0.6374, "step": 5530 }, { "epoch": 0.38936994016191484, "grad_norm": 1.7175627946853638, "learning_rate": 9.771038434152618e-06, "loss": 0.7487, "step": 5531 }, { "epoch": 0.3894403379091869, "grad_norm": 1.8458620309829712, "learning_rate": 9.769572642440637e-06, "loss": 0.6347, "step": 5532 }, { "epoch": 0.389510735656459, "grad_norm": 1.7708427906036377, "learning_rate": 9.768106706736065e-06, "loss": 0.8158, "step": 5533 }, { "epoch": 0.3895811334037311, "grad_norm": 1.5983657836914062, "learning_rate": 9.76664062711512e-06, "loss": 0.5886, "step": 5534 }, { "epoch": 0.38965153115100315, "grad_norm": 1.7792373895645142, "learning_rate": 9.765174403654022e-06, "loss": 0.6259, "step": 5535 }, { "epoch": 0.3897219288982753, "grad_norm": 1.7615834474563599, "learning_rate": 9.763708036429003e-06, "loss": 0.6613, "step": 5536 }, { "epoch": 0.38979232664554736, "grad_norm": 1.7922098636627197, "learning_rate": 9.762241525516301e-06, "loss": 0.7671, "step": 5537 }, { "epoch": 0.38986272439281944, "grad_norm": 1.7660354375839233, "learning_rate": 9.760774870992159e-06, "loss": 0.6645, "step": 5538 }, { "epoch": 0.3899331221400915, "grad_norm": 1.6601006984710693, "learning_rate": 9.759308072932832e-06, "loss": 0.7733, "step": 5539 }, { "epoch": 0.3900035198873636, "grad_norm": 1.7215995788574219, "learning_rate": 9.75784113141458e-06, "loss": 0.7263, "step": 5540 }, { "epoch": 0.39007391763463567, "grad_norm": 2.0790717601776123, "learning_rate": 9.756374046513668e-06, "loss": 0.7428, "step": 5541 }, { "epoch": 0.3901443153819078, "grad_norm": 1.9518482685089111, "learning_rate": 9.754906818306374e-06, "loss": 0.6418, "step": 5542 }, { "epoch": 0.3902147131291799, "grad_norm": 1.867910623550415, "learning_rate": 9.753439446868981e-06, "loss": 0.6915, "step": 5543 }, { "epoch": 0.39028511087645196, "grad_norm": 1.420827865600586, "learning_rate": 9.751971932277777e-06, "loss": 0.62, "step": 5544 }, { "epoch": 0.39035550862372403, "grad_norm": 1.7914464473724365, "learning_rate": 9.750504274609057e-06, "loss": 0.7767, "step": 5545 }, { "epoch": 0.3904259063709961, "grad_norm": 1.8050329685211182, "learning_rate": 9.749036473939131e-06, "loss": 0.748, "step": 5546 }, { "epoch": 0.3904963041182682, "grad_norm": 1.7995966672897339, "learning_rate": 9.747568530344308e-06, "loss": 0.6775, "step": 5547 }, { "epoch": 0.3905667018655403, "grad_norm": 1.7150462865829468, "learning_rate": 9.74610044390091e-06, "loss": 0.5979, "step": 5548 }, { "epoch": 0.3906370996128124, "grad_norm": 1.7092957496643066, "learning_rate": 9.744632214685263e-06, "loss": 0.6112, "step": 5549 }, { "epoch": 0.3907074973600845, "grad_norm": 1.7161226272583008, "learning_rate": 9.743163842773702e-06, "loss": 0.7685, "step": 5550 }, { "epoch": 0.39077789510735655, "grad_norm": 1.8535044193267822, "learning_rate": 9.741695328242565e-06, "loss": 0.6719, "step": 5551 }, { "epoch": 0.39084829285462863, "grad_norm": 1.989148497581482, "learning_rate": 9.74022667116821e-06, "loss": 0.6362, "step": 5552 }, { "epoch": 0.39091869060190076, "grad_norm": 1.5626888275146484, "learning_rate": 9.738757871626984e-06, "loss": 0.6967, "step": 5553 }, { "epoch": 0.39098908834917284, "grad_norm": 1.6351114511489868, "learning_rate": 9.73728892969526e-06, "loss": 0.7358, "step": 5554 }, { "epoch": 0.3910594860964449, "grad_norm": 1.9982280731201172, "learning_rate": 9.735819845449403e-06, "loss": 0.6404, "step": 5555 }, { "epoch": 0.391129883843717, "grad_norm": 1.8469774723052979, "learning_rate": 9.734350618965793e-06, "loss": 0.7676, "step": 5556 }, { "epoch": 0.3912002815909891, "grad_norm": 2.013108253479004, "learning_rate": 9.73288125032082e-06, "loss": 0.7444, "step": 5557 }, { "epoch": 0.39127067933826115, "grad_norm": 2.588761568069458, "learning_rate": 9.731411739590877e-06, "loss": 0.7033, "step": 5558 }, { "epoch": 0.3913410770855333, "grad_norm": 1.910742998123169, "learning_rate": 9.729942086852363e-06, "loss": 0.8642, "step": 5559 }, { "epoch": 0.39141147483280536, "grad_norm": 1.99662446975708, "learning_rate": 9.728472292181686e-06, "loss": 0.7718, "step": 5560 }, { "epoch": 0.39148187258007744, "grad_norm": 1.6677974462509155, "learning_rate": 9.727002355655266e-06, "loss": 0.6839, "step": 5561 }, { "epoch": 0.3915522703273495, "grad_norm": 2.004951000213623, "learning_rate": 9.725532277349522e-06, "loss": 0.7197, "step": 5562 }, { "epoch": 0.3916226680746216, "grad_norm": 2.211705207824707, "learning_rate": 9.724062057340885e-06, "loss": 0.8211, "step": 5563 }, { "epoch": 0.3916930658218937, "grad_norm": 1.7051342725753784, "learning_rate": 9.722591695705798e-06, "loss": 0.6778, "step": 5564 }, { "epoch": 0.3917634635691658, "grad_norm": 1.768874168395996, "learning_rate": 9.7211211925207e-06, "loss": 0.757, "step": 5565 }, { "epoch": 0.3918338613164379, "grad_norm": 1.9665753841400146, "learning_rate": 9.71965054786205e-06, "loss": 0.7929, "step": 5566 }, { "epoch": 0.39190425906370996, "grad_norm": 1.7178375720977783, "learning_rate": 9.718179761806304e-06, "loss": 0.7465, "step": 5567 }, { "epoch": 0.39197465681098204, "grad_norm": 1.9417741298675537, "learning_rate": 9.716708834429929e-06, "loss": 0.8173, "step": 5568 }, { "epoch": 0.3920450545582541, "grad_norm": 1.5716177225112915, "learning_rate": 9.715237765809402e-06, "loss": 0.5334, "step": 5569 }, { "epoch": 0.39211545230552625, "grad_norm": 1.8033146858215332, "learning_rate": 9.713766556021204e-06, "loss": 0.7228, "step": 5570 }, { "epoch": 0.3921858500527983, "grad_norm": 1.8960916996002197, "learning_rate": 9.712295205141827e-06, "loss": 0.7081, "step": 5571 }, { "epoch": 0.3922562478000704, "grad_norm": 1.9240418672561646, "learning_rate": 9.710823713247762e-06, "loss": 0.623, "step": 5572 }, { "epoch": 0.3923266455473425, "grad_norm": 1.6313966512680054, "learning_rate": 9.709352080415522e-06, "loss": 0.7339, "step": 5573 }, { "epoch": 0.39239704329461456, "grad_norm": 1.417414903640747, "learning_rate": 9.70788030672161e-06, "loss": 0.762, "step": 5574 }, { "epoch": 0.39246744104188663, "grad_norm": 1.618254542350769, "learning_rate": 9.706408392242548e-06, "loss": 0.6462, "step": 5575 }, { "epoch": 0.39253783878915877, "grad_norm": 1.755558967590332, "learning_rate": 9.704936337054865e-06, "loss": 0.7319, "step": 5576 }, { "epoch": 0.39260823653643084, "grad_norm": 1.869261622428894, "learning_rate": 9.703464141235091e-06, "loss": 0.7422, "step": 5577 }, { "epoch": 0.3926786342837029, "grad_norm": 1.6230076551437378, "learning_rate": 9.70199180485977e-06, "loss": 0.6059, "step": 5578 }, { "epoch": 0.392749032030975, "grad_norm": 1.996168613433838, "learning_rate": 9.700519328005445e-06, "loss": 0.8045, "step": 5579 }, { "epoch": 0.3928194297782471, "grad_norm": 1.3776401281356812, "learning_rate": 9.699046710748676e-06, "loss": 0.6437, "step": 5580 }, { "epoch": 0.3928898275255192, "grad_norm": 2.1745691299438477, "learning_rate": 9.697573953166023e-06, "loss": 0.7866, "step": 5581 }, { "epoch": 0.3929602252727913, "grad_norm": 1.9311431646347046, "learning_rate": 9.696101055334057e-06, "loss": 0.663, "step": 5582 }, { "epoch": 0.39303062302006336, "grad_norm": 1.8500245809555054, "learning_rate": 9.694628017329354e-06, "loss": 0.6654, "step": 5583 }, { "epoch": 0.39310102076733544, "grad_norm": 1.7813059091567993, "learning_rate": 9.693154839228502e-06, "loss": 0.6812, "step": 5584 }, { "epoch": 0.3931714185146075, "grad_norm": 2.9597959518432617, "learning_rate": 9.691681521108093e-06, "loss": 0.7343, "step": 5585 }, { "epoch": 0.3932418162618796, "grad_norm": 1.5973812341690063, "learning_rate": 9.690208063044723e-06, "loss": 0.6931, "step": 5586 }, { "epoch": 0.39331221400915173, "grad_norm": 1.89991295337677, "learning_rate": 9.688734465114996e-06, "loss": 0.6674, "step": 5587 }, { "epoch": 0.3933826117564238, "grad_norm": 1.658571481704712, "learning_rate": 9.687260727395534e-06, "loss": 0.7988, "step": 5588 }, { "epoch": 0.3934530095036959, "grad_norm": 1.7606230974197388, "learning_rate": 9.685786849962949e-06, "loss": 0.6838, "step": 5589 }, { "epoch": 0.39352340725096796, "grad_norm": 1.803332805633545, "learning_rate": 9.684312832893874e-06, "loss": 0.6911, "step": 5590 }, { "epoch": 0.39359380499824004, "grad_norm": 1.9671108722686768, "learning_rate": 9.682838676264946e-06, "loss": 0.777, "step": 5591 }, { "epoch": 0.39366420274551217, "grad_norm": 1.8974249362945557, "learning_rate": 9.681364380152805e-06, "loss": 0.5824, "step": 5592 }, { "epoch": 0.39373460049278425, "grad_norm": 1.9445838928222656, "learning_rate": 9.6798899446341e-06, "loss": 0.6929, "step": 5593 }, { "epoch": 0.3938049982400563, "grad_norm": 2.6364235877990723, "learning_rate": 9.67841536978549e-06, "loss": 0.8097, "step": 5594 }, { "epoch": 0.3938753959873284, "grad_norm": 1.664697289466858, "learning_rate": 9.676940655683639e-06, "loss": 0.7037, "step": 5595 }, { "epoch": 0.3939457937346005, "grad_norm": 1.778639554977417, "learning_rate": 9.675465802405219e-06, "loss": 0.7337, "step": 5596 }, { "epoch": 0.39401619148187256, "grad_norm": 1.7590762376785278, "learning_rate": 9.673990810026911e-06, "loss": 0.7568, "step": 5597 }, { "epoch": 0.3940865892291447, "grad_norm": 1.8086475133895874, "learning_rate": 9.672515678625396e-06, "loss": 0.7885, "step": 5598 }, { "epoch": 0.39415698697641677, "grad_norm": 1.7939380407333374, "learning_rate": 9.671040408277372e-06, "loss": 0.7134, "step": 5599 }, { "epoch": 0.39422738472368885, "grad_norm": 1.676728367805481, "learning_rate": 9.669564999059538e-06, "loss": 0.7754, "step": 5600 }, { "epoch": 0.3942977824709609, "grad_norm": 2.1590068340301514, "learning_rate": 9.668089451048602e-06, "loss": 0.755, "step": 5601 }, { "epoch": 0.394368180218233, "grad_norm": 2.7915642261505127, "learning_rate": 9.66661376432128e-06, "loss": 0.7517, "step": 5602 }, { "epoch": 0.3944385779655051, "grad_norm": 1.783827304840088, "learning_rate": 9.665137938954294e-06, "loss": 0.8171, "step": 5603 }, { "epoch": 0.3945089757127772, "grad_norm": 1.699106216430664, "learning_rate": 9.66366197502437e-06, "loss": 0.6758, "step": 5604 }, { "epoch": 0.3945793734600493, "grad_norm": 2.1104142665863037, "learning_rate": 9.662185872608248e-06, "loss": 0.7068, "step": 5605 }, { "epoch": 0.39464977120732136, "grad_norm": 1.766754388809204, "learning_rate": 9.660709631782673e-06, "loss": 0.6646, "step": 5606 }, { "epoch": 0.39472016895459344, "grad_norm": 1.743774175643921, "learning_rate": 9.659233252624393e-06, "loss": 0.7426, "step": 5607 }, { "epoch": 0.3947905667018655, "grad_norm": 1.7642467021942139, "learning_rate": 9.657756735210169e-06, "loss": 0.7035, "step": 5608 }, { "epoch": 0.39486096444913765, "grad_norm": 1.8940998315811157, "learning_rate": 9.656280079616767e-06, "loss": 0.6844, "step": 5609 }, { "epoch": 0.39493136219640973, "grad_norm": 1.757473349571228, "learning_rate": 9.654803285920956e-06, "loss": 0.637, "step": 5610 }, { "epoch": 0.3950017599436818, "grad_norm": 2.1561524868011475, "learning_rate": 9.653326354199516e-06, "loss": 0.7795, "step": 5611 }, { "epoch": 0.3950721576909539, "grad_norm": 1.9163944721221924, "learning_rate": 9.651849284529238e-06, "loss": 0.7347, "step": 5612 }, { "epoch": 0.39514255543822596, "grad_norm": 1.661069393157959, "learning_rate": 9.650372076986914e-06, "loss": 0.7712, "step": 5613 }, { "epoch": 0.39521295318549804, "grad_norm": 1.9511587619781494, "learning_rate": 9.648894731649345e-06, "loss": 0.7881, "step": 5614 }, { "epoch": 0.39528335093277017, "grad_norm": 1.7308624982833862, "learning_rate": 9.647417248593342e-06, "loss": 0.6616, "step": 5615 }, { "epoch": 0.39535374868004225, "grad_norm": 1.873694658279419, "learning_rate": 9.645939627895715e-06, "loss": 0.7186, "step": 5616 }, { "epoch": 0.3954241464273143, "grad_norm": 1.836734414100647, "learning_rate": 9.644461869633291e-06, "loss": 0.7112, "step": 5617 }, { "epoch": 0.3954945441745864, "grad_norm": 1.8547680377960205, "learning_rate": 9.642983973882901e-06, "loss": 0.7324, "step": 5618 }, { "epoch": 0.3955649419218585, "grad_norm": 1.637615442276001, "learning_rate": 9.64150594072138e-06, "loss": 0.671, "step": 5619 }, { "epoch": 0.3956353396691306, "grad_norm": 1.9111108779907227, "learning_rate": 9.640027770225573e-06, "loss": 0.7004, "step": 5620 }, { "epoch": 0.3957057374164027, "grad_norm": 1.8978748321533203, "learning_rate": 9.638549462472332e-06, "loss": 0.7475, "step": 5621 }, { "epoch": 0.39577613516367477, "grad_norm": 1.939159870147705, "learning_rate": 9.637071017538514e-06, "loss": 0.7212, "step": 5622 }, { "epoch": 0.39584653291094685, "grad_norm": 1.8471709489822388, "learning_rate": 9.635592435500985e-06, "loss": 0.6181, "step": 5623 }, { "epoch": 0.3959169306582189, "grad_norm": 1.8075332641601562, "learning_rate": 9.63411371643662e-06, "loss": 0.7879, "step": 5624 }, { "epoch": 0.395987328405491, "grad_norm": 1.7333685159683228, "learning_rate": 9.632634860422296e-06, "loss": 0.8012, "step": 5625 }, { "epoch": 0.39605772615276313, "grad_norm": 1.83855140209198, "learning_rate": 9.6311558675349e-06, "loss": 0.6778, "step": 5626 }, { "epoch": 0.3961281239000352, "grad_norm": 1.9079039096832275, "learning_rate": 9.629676737851332e-06, "loss": 0.6928, "step": 5627 }, { "epoch": 0.3961985216473073, "grad_norm": 1.97743558883667, "learning_rate": 9.628197471448485e-06, "loss": 0.643, "step": 5628 }, { "epoch": 0.39626891939457937, "grad_norm": 1.723567008972168, "learning_rate": 9.626718068403272e-06, "loss": 0.7343, "step": 5629 }, { "epoch": 0.39633931714185144, "grad_norm": 1.6108994483947754, "learning_rate": 9.625238528792607e-06, "loss": 0.8485, "step": 5630 }, { "epoch": 0.3964097148891235, "grad_norm": 1.8023483753204346, "learning_rate": 9.623758852693417e-06, "loss": 0.679, "step": 5631 }, { "epoch": 0.39648011263639565, "grad_norm": 1.8468098640441895, "learning_rate": 9.622279040182623e-06, "loss": 0.7465, "step": 5632 }, { "epoch": 0.39655051038366773, "grad_norm": 1.7993305921554565, "learning_rate": 9.62079909133717e-06, "loss": 0.7531, "step": 5633 }, { "epoch": 0.3966209081309398, "grad_norm": 2.2064268589019775, "learning_rate": 9.619319006233996e-06, "loss": 0.9047, "step": 5634 }, { "epoch": 0.3966913058782119, "grad_norm": 1.8723995685577393, "learning_rate": 9.617838784950056e-06, "loss": 0.7639, "step": 5635 }, { "epoch": 0.39676170362548396, "grad_norm": 2.1562721729278564, "learning_rate": 9.616358427562308e-06, "loss": 0.5998, "step": 5636 }, { "epoch": 0.3968321013727561, "grad_norm": 1.8937634229660034, "learning_rate": 9.614877934147716e-06, "loss": 0.7617, "step": 5637 }, { "epoch": 0.3969024991200282, "grad_norm": 1.7877836227416992, "learning_rate": 9.613397304783251e-06, "loss": 0.7237, "step": 5638 }, { "epoch": 0.39697289686730025, "grad_norm": 1.84523606300354, "learning_rate": 9.611916539545893e-06, "loss": 0.6094, "step": 5639 }, { "epoch": 0.39704329461457233, "grad_norm": 2.0550453662872314, "learning_rate": 9.61043563851263e-06, "loss": 0.7467, "step": 5640 }, { "epoch": 0.3971136923618444, "grad_norm": 1.8072302341461182, "learning_rate": 9.608954601760452e-06, "loss": 0.7831, "step": 5641 }, { "epoch": 0.3971840901091165, "grad_norm": 2.0989537239074707, "learning_rate": 9.607473429366365e-06, "loss": 0.6131, "step": 5642 }, { "epoch": 0.3972544878563886, "grad_norm": 1.7158708572387695, "learning_rate": 9.60599212140737e-06, "loss": 0.6073, "step": 5643 }, { "epoch": 0.3973248856036607, "grad_norm": 1.803992748260498, "learning_rate": 9.604510677960483e-06, "loss": 0.704, "step": 5644 }, { "epoch": 0.39739528335093277, "grad_norm": 1.9705408811569214, "learning_rate": 9.60302909910273e-06, "loss": 0.6783, "step": 5645 }, { "epoch": 0.39746568109820485, "grad_norm": 1.8181060552597046, "learning_rate": 9.601547384911134e-06, "loss": 0.6656, "step": 5646 }, { "epoch": 0.3975360788454769, "grad_norm": 1.753326177597046, "learning_rate": 9.600065535462736e-06, "loss": 0.655, "step": 5647 }, { "epoch": 0.39760647659274906, "grad_norm": 2.033822774887085, "learning_rate": 9.598583550834575e-06, "loss": 0.7701, "step": 5648 }, { "epoch": 0.39767687434002114, "grad_norm": 2.44938325881958, "learning_rate": 9.597101431103703e-06, "loss": 0.7676, "step": 5649 }, { "epoch": 0.3977472720872932, "grad_norm": 1.893572449684143, "learning_rate": 9.595619176347173e-06, "loss": 0.7325, "step": 5650 }, { "epoch": 0.3978176698345653, "grad_norm": 1.7821191549301147, "learning_rate": 9.594136786642053e-06, "loss": 0.7072, "step": 5651 }, { "epoch": 0.39788806758183737, "grad_norm": 1.7332006692886353, "learning_rate": 9.592654262065412e-06, "loss": 0.6879, "step": 5652 }, { "epoch": 0.39795846532910945, "grad_norm": 3.2600131034851074, "learning_rate": 9.591171602694326e-06, "loss": 0.6917, "step": 5653 }, { "epoch": 0.3980288630763816, "grad_norm": 2.051196336746216, "learning_rate": 9.589688808605884e-06, "loss": 0.7379, "step": 5654 }, { "epoch": 0.39809926082365366, "grad_norm": 2.25900936126709, "learning_rate": 9.588205879877174e-06, "loss": 0.6384, "step": 5655 }, { "epoch": 0.39816965857092573, "grad_norm": 1.8466739654541016, "learning_rate": 9.586722816585299e-06, "loss": 0.7932, "step": 5656 }, { "epoch": 0.3982400563181978, "grad_norm": 1.9396183490753174, "learning_rate": 9.585239618807361e-06, "loss": 0.7016, "step": 5657 }, { "epoch": 0.3983104540654699, "grad_norm": 1.9233624935150146, "learning_rate": 9.583756286620472e-06, "loss": 0.6394, "step": 5658 }, { "epoch": 0.39838085181274197, "grad_norm": 1.799169659614563, "learning_rate": 9.582272820101755e-06, "loss": 0.6576, "step": 5659 }, { "epoch": 0.3984512495600141, "grad_norm": 2.190847396850586, "learning_rate": 9.580789219328334e-06, "loss": 0.8254, "step": 5660 }, { "epoch": 0.3985216473072862, "grad_norm": 1.9190279245376587, "learning_rate": 9.579305484377346e-06, "loss": 0.7519, "step": 5661 }, { "epoch": 0.39859204505455825, "grad_norm": 1.8280019760131836, "learning_rate": 9.577821615325928e-06, "loss": 0.6898, "step": 5662 }, { "epoch": 0.39866244280183033, "grad_norm": 2.796816349029541, "learning_rate": 9.576337612251231e-06, "loss": 0.7013, "step": 5663 }, { "epoch": 0.3987328405491024, "grad_norm": 1.6733360290527344, "learning_rate": 9.574853475230409e-06, "loss": 0.6456, "step": 5664 }, { "epoch": 0.39880323829637454, "grad_norm": 2.0422823429107666, "learning_rate": 9.573369204340623e-06, "loss": 0.7284, "step": 5665 }, { "epoch": 0.3988736360436466, "grad_norm": 1.8659354448318481, "learning_rate": 9.57188479965904e-06, "loss": 0.7933, "step": 5666 }, { "epoch": 0.3989440337909187, "grad_norm": 1.6957063674926758, "learning_rate": 9.570400261262838e-06, "loss": 0.7279, "step": 5667 }, { "epoch": 0.3990144315381908, "grad_norm": 1.804283618927002, "learning_rate": 9.568915589229197e-06, "loss": 0.7859, "step": 5668 }, { "epoch": 0.39908482928546285, "grad_norm": 1.589419960975647, "learning_rate": 9.56743078363531e-06, "loss": 0.6942, "step": 5669 }, { "epoch": 0.3991552270327349, "grad_norm": 1.7047122716903687, "learning_rate": 9.565945844558368e-06, "loss": 0.7569, "step": 5670 }, { "epoch": 0.39922562478000706, "grad_norm": 1.6502279043197632, "learning_rate": 9.564460772075582e-06, "loss": 0.7128, "step": 5671 }, { "epoch": 0.39929602252727914, "grad_norm": 1.975404143333435, "learning_rate": 9.562975566264156e-06, "loss": 0.7106, "step": 5672 }, { "epoch": 0.3993664202745512, "grad_norm": 1.9239375591278076, "learning_rate": 9.56149022720131e-06, "loss": 0.5742, "step": 5673 }, { "epoch": 0.3994368180218233, "grad_norm": 1.9068118333816528, "learning_rate": 9.560004754964265e-06, "loss": 0.7419, "step": 5674 }, { "epoch": 0.39950721576909537, "grad_norm": 1.796889305114746, "learning_rate": 9.558519149630258e-06, "loss": 0.5982, "step": 5675 }, { "epoch": 0.3995776135163675, "grad_norm": 2.1433496475219727, "learning_rate": 9.55703341127652e-06, "loss": 0.5911, "step": 5676 }, { "epoch": 0.3996480112636396, "grad_norm": 1.7491233348846436, "learning_rate": 9.5555475399803e-06, "loss": 0.6094, "step": 5677 }, { "epoch": 0.39971840901091166, "grad_norm": 2.0059337615966797, "learning_rate": 9.554061535818848e-06, "loss": 0.8125, "step": 5678 }, { "epoch": 0.39978880675818373, "grad_norm": 1.8465811014175415, "learning_rate": 9.552575398869427e-06, "loss": 0.6843, "step": 5679 }, { "epoch": 0.3998592045054558, "grad_norm": 2.027442216873169, "learning_rate": 9.551089129209295e-06, "loss": 0.794, "step": 5680 }, { "epoch": 0.3999296022527279, "grad_norm": 1.8214315176010132, "learning_rate": 9.549602726915732e-06, "loss": 0.7609, "step": 5681 }, { "epoch": 0.4, "grad_norm": 1.9064480066299438, "learning_rate": 9.54811619206601e-06, "loss": 0.6892, "step": 5682 }, { "epoch": 0.4000703977472721, "grad_norm": 1.8409302234649658, "learning_rate": 9.546629524737421e-06, "loss": 0.7295, "step": 5683 }, { "epoch": 0.4001407954945442, "grad_norm": 2.028588056564331, "learning_rate": 9.545142725007261e-06, "loss": 0.7057, "step": 5684 }, { "epoch": 0.40021119324181625, "grad_norm": 1.7423746585845947, "learning_rate": 9.543655792952823e-06, "loss": 0.7489, "step": 5685 }, { "epoch": 0.40028159098908833, "grad_norm": 3.653327703475952, "learning_rate": 9.542168728651413e-06, "loss": 0.7592, "step": 5686 }, { "epoch": 0.4003519887363604, "grad_norm": 1.5378414392471313, "learning_rate": 9.540681532180355e-06, "loss": 0.8099, "step": 5687 }, { "epoch": 0.40042238648363254, "grad_norm": 1.7829344272613525, "learning_rate": 9.53919420361696e-06, "loss": 0.7664, "step": 5688 }, { "epoch": 0.4004927842309046, "grad_norm": 1.5502066612243652, "learning_rate": 9.537706743038558e-06, "loss": 0.7225, "step": 5689 }, { "epoch": 0.4005631819781767, "grad_norm": 1.6796952486038208, "learning_rate": 9.536219150522487e-06, "loss": 0.7088, "step": 5690 }, { "epoch": 0.4006335797254488, "grad_norm": 1.7462257146835327, "learning_rate": 9.534731426146083e-06, "loss": 0.7628, "step": 5691 }, { "epoch": 0.40070397747272085, "grad_norm": 1.6705410480499268, "learning_rate": 9.533243569986695e-06, "loss": 0.6729, "step": 5692 }, { "epoch": 0.400774375219993, "grad_norm": 1.6411266326904297, "learning_rate": 9.531755582121683e-06, "loss": 0.7266, "step": 5693 }, { "epoch": 0.40084477296726506, "grad_norm": 1.6925642490386963, "learning_rate": 9.530267462628402e-06, "loss": 0.671, "step": 5694 }, { "epoch": 0.40091517071453714, "grad_norm": 1.7912423610687256, "learning_rate": 9.528779211584226e-06, "loss": 0.7795, "step": 5695 }, { "epoch": 0.4009855684618092, "grad_norm": 2.158135414123535, "learning_rate": 9.52729082906653e-06, "loss": 0.6136, "step": 5696 }, { "epoch": 0.4010559662090813, "grad_norm": 1.7546265125274658, "learning_rate": 9.525802315152692e-06, "loss": 0.685, "step": 5697 }, { "epoch": 0.40112636395635337, "grad_norm": 1.826875925064087, "learning_rate": 9.524313669920106e-06, "loss": 0.6716, "step": 5698 }, { "epoch": 0.4011967617036255, "grad_norm": 1.7154937982559204, "learning_rate": 9.522824893446167e-06, "loss": 0.6949, "step": 5699 }, { "epoch": 0.4012671594508976, "grad_norm": 1.8083882331848145, "learning_rate": 9.521335985808276e-06, "loss": 0.7119, "step": 5700 }, { "epoch": 0.40133755719816966, "grad_norm": 1.6063218116760254, "learning_rate": 9.519846947083841e-06, "loss": 0.6595, "step": 5701 }, { "epoch": 0.40140795494544174, "grad_norm": 1.790459394454956, "learning_rate": 9.518357777350287e-06, "loss": 0.6524, "step": 5702 }, { "epoch": 0.4014783526927138, "grad_norm": 1.8051888942718506, "learning_rate": 9.516868476685026e-06, "loss": 0.7346, "step": 5703 }, { "epoch": 0.40154875043998595, "grad_norm": 1.4663714170455933, "learning_rate": 9.515379045165496e-06, "loss": 0.6583, "step": 5704 }, { "epoch": 0.401619148187258, "grad_norm": 1.753555178642273, "learning_rate": 9.513889482869131e-06, "loss": 0.7919, "step": 5705 }, { "epoch": 0.4016895459345301, "grad_norm": 1.479905366897583, "learning_rate": 9.512399789873375e-06, "loss": 0.6259, "step": 5706 }, { "epoch": 0.4017599436818022, "grad_norm": 1.8101551532745361, "learning_rate": 9.510909966255678e-06, "loss": 0.8163, "step": 5707 }, { "epoch": 0.40183034142907426, "grad_norm": 1.8088176250457764, "learning_rate": 9.509420012093498e-06, "loss": 0.6427, "step": 5708 }, { "epoch": 0.40190073917634633, "grad_norm": 1.7744650840759277, "learning_rate": 9.5079299274643e-06, "loss": 0.7849, "step": 5709 }, { "epoch": 0.40197113692361847, "grad_norm": 1.8404700756072998, "learning_rate": 9.506439712445552e-06, "loss": 0.7982, "step": 5710 }, { "epoch": 0.40204153467089054, "grad_norm": 1.8321253061294556, "learning_rate": 9.504949367114733e-06, "loss": 0.7551, "step": 5711 }, { "epoch": 0.4021119324181626, "grad_norm": 1.777778148651123, "learning_rate": 9.503458891549331e-06, "loss": 0.7794, "step": 5712 }, { "epoch": 0.4021823301654347, "grad_norm": 2.0994369983673096, "learning_rate": 9.501968285826832e-06, "loss": 0.7883, "step": 5713 }, { "epoch": 0.4022527279127068, "grad_norm": 1.8754111528396606, "learning_rate": 9.500477550024736e-06, "loss": 0.7647, "step": 5714 }, { "epoch": 0.40232312565997885, "grad_norm": 2.1719653606414795, "learning_rate": 9.498986684220547e-06, "loss": 0.7154, "step": 5715 }, { "epoch": 0.402393523407251, "grad_norm": 1.8492810726165771, "learning_rate": 9.497495688491778e-06, "loss": 0.8543, "step": 5716 }, { "epoch": 0.40246392115452306, "grad_norm": 1.6183831691741943, "learning_rate": 9.496004562915946e-06, "loss": 0.6946, "step": 5717 }, { "epoch": 0.40253431890179514, "grad_norm": 1.7026729583740234, "learning_rate": 9.494513307570576e-06, "loss": 0.7409, "step": 5718 }, { "epoch": 0.4026047166490672, "grad_norm": 1.785105586051941, "learning_rate": 9.4930219225332e-06, "loss": 0.622, "step": 5719 }, { "epoch": 0.4026751143963393, "grad_norm": 1.9942015409469604, "learning_rate": 9.491530407881356e-06, "loss": 0.6938, "step": 5720 }, { "epoch": 0.40274551214361143, "grad_norm": 2.1085386276245117, "learning_rate": 9.49003876369259e-06, "loss": 0.663, "step": 5721 }, { "epoch": 0.4028159098908835, "grad_norm": 1.71738862991333, "learning_rate": 9.488546990044453e-06, "loss": 0.6842, "step": 5722 }, { "epoch": 0.4028863076381556, "grad_norm": 1.723063588142395, "learning_rate": 9.487055087014507e-06, "loss": 0.6465, "step": 5723 }, { "epoch": 0.40295670538542766, "grad_norm": 2.5643258094787598, "learning_rate": 9.485563054680313e-06, "loss": 0.7492, "step": 5724 }, { "epoch": 0.40302710313269974, "grad_norm": 1.256446361541748, "learning_rate": 9.484070893119443e-06, "loss": 0.8049, "step": 5725 }, { "epoch": 0.4030975008799718, "grad_norm": 1.893876314163208, "learning_rate": 9.482578602409481e-06, "loss": 0.7268, "step": 5726 }, { "epoch": 0.40316789862724395, "grad_norm": 1.857404112815857, "learning_rate": 9.481086182628006e-06, "loss": 0.6741, "step": 5727 }, { "epoch": 0.403238296374516, "grad_norm": 1.5152446031570435, "learning_rate": 9.479593633852614e-06, "loss": 0.6962, "step": 5728 }, { "epoch": 0.4033086941217881, "grad_norm": 1.9274277687072754, "learning_rate": 9.478100956160905e-06, "loss": 0.6946, "step": 5729 }, { "epoch": 0.4033790918690602, "grad_norm": 1.7151548862457275, "learning_rate": 9.47660814963048e-06, "loss": 0.5361, "step": 5730 }, { "epoch": 0.40344948961633226, "grad_norm": 1.6509225368499756, "learning_rate": 9.475115214338956e-06, "loss": 0.6887, "step": 5731 }, { "epoch": 0.4035198873636044, "grad_norm": 1.5534749031066895, "learning_rate": 9.473622150363953e-06, "loss": 0.6806, "step": 5732 }, { "epoch": 0.40359028511087647, "grad_norm": 1.972931146621704, "learning_rate": 9.472128957783088e-06, "loss": 0.7188, "step": 5733 }, { "epoch": 0.40366068285814855, "grad_norm": 1.9601820707321167, "learning_rate": 9.470635636674003e-06, "loss": 0.7169, "step": 5734 }, { "epoch": 0.4037310806054206, "grad_norm": 1.702879548072815, "learning_rate": 9.469142187114335e-06, "loss": 0.657, "step": 5735 }, { "epoch": 0.4038014783526927, "grad_norm": 1.8446251153945923, "learning_rate": 9.467648609181727e-06, "loss": 0.6323, "step": 5736 }, { "epoch": 0.4038718760999648, "grad_norm": 2.65925931930542, "learning_rate": 9.466154902953834e-06, "loss": 0.7084, "step": 5737 }, { "epoch": 0.4039422738472369, "grad_norm": 1.7357723712921143, "learning_rate": 9.464661068508314e-06, "loss": 0.6357, "step": 5738 }, { "epoch": 0.404012671594509, "grad_norm": 2.0799858570098877, "learning_rate": 9.463167105922831e-06, "loss": 0.71, "step": 5739 }, { "epoch": 0.40408306934178106, "grad_norm": 1.755706548690796, "learning_rate": 9.461673015275059e-06, "loss": 0.5586, "step": 5740 }, { "epoch": 0.40415346708905314, "grad_norm": 1.5220930576324463, "learning_rate": 9.460178796642682e-06, "loss": 0.7264, "step": 5741 }, { "epoch": 0.4042238648363252, "grad_norm": 2.1348989009857178, "learning_rate": 9.458684450103379e-06, "loss": 0.7076, "step": 5742 }, { "epoch": 0.40429426258359735, "grad_norm": 2.168361186981201, "learning_rate": 9.457189975734843e-06, "loss": 0.7047, "step": 5743 }, { "epoch": 0.40436466033086943, "grad_norm": 1.817024827003479, "learning_rate": 9.455695373614777e-06, "loss": 0.6854, "step": 5744 }, { "epoch": 0.4044350580781415, "grad_norm": 1.7404192686080933, "learning_rate": 9.454200643820883e-06, "loss": 0.8694, "step": 5745 }, { "epoch": 0.4045054558254136, "grad_norm": 1.6466504335403442, "learning_rate": 9.452705786430876e-06, "loss": 0.7081, "step": 5746 }, { "epoch": 0.40457585357268566, "grad_norm": 1.7738351821899414, "learning_rate": 9.451210801522474e-06, "loss": 0.6835, "step": 5747 }, { "epoch": 0.40464625131995774, "grad_norm": 1.7137365341186523, "learning_rate": 9.4497156891734e-06, "loss": 0.7032, "step": 5748 }, { "epoch": 0.40471664906722987, "grad_norm": 1.798773169517517, "learning_rate": 9.448220449461391e-06, "loss": 0.7315, "step": 5749 }, { "epoch": 0.40478704681450195, "grad_norm": 1.7511727809906006, "learning_rate": 9.446725082464186e-06, "loss": 0.737, "step": 5750 }, { "epoch": 0.404857444561774, "grad_norm": 1.8785525560379028, "learning_rate": 9.445229588259523e-06, "loss": 0.7509, "step": 5751 }, { "epoch": 0.4049278423090461, "grad_norm": 1.9458194971084595, "learning_rate": 9.443733966925163e-06, "loss": 0.8083, "step": 5752 }, { "epoch": 0.4049982400563182, "grad_norm": 1.6961705684661865, "learning_rate": 9.442238218538859e-06, "loss": 0.836, "step": 5753 }, { "epoch": 0.40506863780359026, "grad_norm": 1.4049104452133179, "learning_rate": 9.44074234317838e-06, "loss": 0.6562, "step": 5754 }, { "epoch": 0.4051390355508624, "grad_norm": 1.5522716045379639, "learning_rate": 9.439246340921492e-06, "loss": 0.6151, "step": 5755 }, { "epoch": 0.40520943329813447, "grad_norm": 1.7786287069320679, "learning_rate": 9.437750211845982e-06, "loss": 0.6896, "step": 5756 }, { "epoch": 0.40527983104540655, "grad_norm": 1.825160264968872, "learning_rate": 9.436253956029628e-06, "loss": 0.721, "step": 5757 }, { "epoch": 0.4053502287926786, "grad_norm": 1.6551214456558228, "learning_rate": 9.434757573550226e-06, "loss": 0.5738, "step": 5758 }, { "epoch": 0.4054206265399507, "grad_norm": 1.7784318923950195, "learning_rate": 9.433261064485572e-06, "loss": 0.6464, "step": 5759 }, { "epoch": 0.40549102428722283, "grad_norm": 1.817336916923523, "learning_rate": 9.431764428913471e-06, "loss": 0.7163, "step": 5760 }, { "epoch": 0.4055614220344949, "grad_norm": 1.9848960638046265, "learning_rate": 9.430267666911736e-06, "loss": 0.7039, "step": 5761 }, { "epoch": 0.405631819781767, "grad_norm": 1.8978503942489624, "learning_rate": 9.428770778558184e-06, "loss": 0.7331, "step": 5762 }, { "epoch": 0.40570221752903907, "grad_norm": 1.6153795719146729, "learning_rate": 9.427273763930638e-06, "loss": 0.5785, "step": 5763 }, { "epoch": 0.40577261527631114, "grad_norm": 1.7465856075286865, "learning_rate": 9.425776623106933e-06, "loss": 0.7456, "step": 5764 }, { "epoch": 0.4058430130235832, "grad_norm": 1.5278129577636719, "learning_rate": 9.424279356164904e-06, "loss": 0.7507, "step": 5765 }, { "epoch": 0.40591341077085535, "grad_norm": 1.9049773216247559, "learning_rate": 9.422781963182395e-06, "loss": 0.6399, "step": 5766 }, { "epoch": 0.40598380851812743, "grad_norm": 1.6051594018936157, "learning_rate": 9.421284444237256e-06, "loss": 0.6991, "step": 5767 }, { "epoch": 0.4060542062653995, "grad_norm": 1.8460326194763184, "learning_rate": 9.41978679940735e-06, "loss": 0.7001, "step": 5768 }, { "epoch": 0.4061246040126716, "grad_norm": 1.7562357187271118, "learning_rate": 9.418289028770534e-06, "loss": 0.6051, "step": 5769 }, { "epoch": 0.40619500175994366, "grad_norm": 1.5830988883972168, "learning_rate": 9.41679113240468e-06, "loss": 0.6715, "step": 5770 }, { "epoch": 0.4062653995072158, "grad_norm": 1.5191670656204224, "learning_rate": 9.415293110387668e-06, "loss": 0.7869, "step": 5771 }, { "epoch": 0.4063357972544879, "grad_norm": 1.765479564666748, "learning_rate": 9.41379496279738e-06, "loss": 0.6773, "step": 5772 }, { "epoch": 0.40640619500175995, "grad_norm": 1.8467720746994019, "learning_rate": 9.412296689711707e-06, "loss": 0.7208, "step": 5773 }, { "epoch": 0.40647659274903203, "grad_norm": 1.7103976011276245, "learning_rate": 9.410798291208542e-06, "loss": 0.724, "step": 5774 }, { "epoch": 0.4065469904963041, "grad_norm": 1.7214332818984985, "learning_rate": 9.409299767365792e-06, "loss": 0.6948, "step": 5775 }, { "epoch": 0.4066173882435762, "grad_norm": 1.6131806373596191, "learning_rate": 9.407801118261364e-06, "loss": 0.7084, "step": 5776 }, { "epoch": 0.4066877859908483, "grad_norm": 1.8579357862472534, "learning_rate": 9.406302343973177e-06, "loss": 0.7497, "step": 5777 }, { "epoch": 0.4067581837381204, "grad_norm": 1.3415099382400513, "learning_rate": 9.404803444579149e-06, "loss": 0.6943, "step": 5778 }, { "epoch": 0.40682858148539247, "grad_norm": 1.6667970418930054, "learning_rate": 9.403304420157215e-06, "loss": 0.8075, "step": 5779 }, { "epoch": 0.40689897923266455, "grad_norm": 1.8639007806777954, "learning_rate": 9.401805270785307e-06, "loss": 0.7166, "step": 5780 }, { "epoch": 0.4069693769799366, "grad_norm": 1.8055942058563232, "learning_rate": 9.400305996541368e-06, "loss": 0.7206, "step": 5781 }, { "epoch": 0.4070397747272087, "grad_norm": 1.6711360216140747, "learning_rate": 9.398806597503343e-06, "loss": 0.8106, "step": 5782 }, { "epoch": 0.40711017247448084, "grad_norm": 1.5883315801620483, "learning_rate": 9.397307073749192e-06, "loss": 0.7777, "step": 5783 }, { "epoch": 0.4071805702217529, "grad_norm": 2.00970458984375, "learning_rate": 9.395807425356878e-06, "loss": 0.7815, "step": 5784 }, { "epoch": 0.407250967969025, "grad_norm": 1.8645482063293457, "learning_rate": 9.394307652404363e-06, "loss": 0.7711, "step": 5785 }, { "epoch": 0.40732136571629707, "grad_norm": 1.5793050527572632, "learning_rate": 9.392807754969627e-06, "loss": 0.6503, "step": 5786 }, { "epoch": 0.40739176346356915, "grad_norm": 1.684002161026001, "learning_rate": 9.391307733130647e-06, "loss": 0.6764, "step": 5787 }, { "epoch": 0.4074621612108413, "grad_norm": 2.0054900646209717, "learning_rate": 9.389807586965413e-06, "loss": 0.7274, "step": 5788 }, { "epoch": 0.40753255895811336, "grad_norm": 1.7945923805236816, "learning_rate": 9.388307316551918e-06, "loss": 0.6604, "step": 5789 }, { "epoch": 0.40760295670538543, "grad_norm": 2.067002296447754, "learning_rate": 9.386806921968163e-06, "loss": 0.7564, "step": 5790 }, { "epoch": 0.4076733544526575, "grad_norm": 1.6093369722366333, "learning_rate": 9.385306403292154e-06, "loss": 0.7889, "step": 5791 }, { "epoch": 0.4077437521999296, "grad_norm": 2.1637468338012695, "learning_rate": 9.383805760601907e-06, "loss": 0.7796, "step": 5792 }, { "epoch": 0.40781414994720167, "grad_norm": 1.799615502357483, "learning_rate": 9.382304993975436e-06, "loss": 0.7907, "step": 5793 }, { "epoch": 0.4078845476944738, "grad_norm": 1.8235608339309692, "learning_rate": 9.380804103490773e-06, "loss": 0.7245, "step": 5794 }, { "epoch": 0.4079549454417459, "grad_norm": 1.6648019552230835, "learning_rate": 9.37930308922595e-06, "loss": 0.7449, "step": 5795 }, { "epoch": 0.40802534318901795, "grad_norm": 2.0128161907196045, "learning_rate": 9.377801951259001e-06, "loss": 0.7422, "step": 5796 }, { "epoch": 0.40809574093629003, "grad_norm": 1.9637551307678223, "learning_rate": 9.376300689667977e-06, "loss": 0.6614, "step": 5797 }, { "epoch": 0.4081661386835621, "grad_norm": 1.8333698511123657, "learning_rate": 9.374799304530927e-06, "loss": 0.678, "step": 5798 }, { "epoch": 0.40823653643083424, "grad_norm": 1.5768358707427979, "learning_rate": 9.37329779592591e-06, "loss": 0.7203, "step": 5799 }, { "epoch": 0.4083069341781063, "grad_norm": 2.104137897491455, "learning_rate": 9.371796163930994e-06, "loss": 0.7481, "step": 5800 }, { "epoch": 0.4083773319253784, "grad_norm": 1.641037106513977, "learning_rate": 9.370294408624246e-06, "loss": 0.6776, "step": 5801 }, { "epoch": 0.4084477296726505, "grad_norm": 1.7680681943893433, "learning_rate": 9.368792530083744e-06, "loss": 0.7862, "step": 5802 }, { "epoch": 0.40851812741992255, "grad_norm": 1.776811957359314, "learning_rate": 9.367290528387574e-06, "loss": 0.8379, "step": 5803 }, { "epoch": 0.4085885251671946, "grad_norm": 1.9183528423309326, "learning_rate": 9.365788403613827e-06, "loss": 0.6661, "step": 5804 }, { "epoch": 0.40865892291446676, "grad_norm": 1.7065305709838867, "learning_rate": 9.364286155840596e-06, "loss": 0.5923, "step": 5805 }, { "epoch": 0.40872932066173884, "grad_norm": 1.817001461982727, "learning_rate": 9.362783785145987e-06, "loss": 0.696, "step": 5806 }, { "epoch": 0.4087997184090109, "grad_norm": 1.7162680625915527, "learning_rate": 9.36128129160811e-06, "loss": 0.7254, "step": 5807 }, { "epoch": 0.408870116156283, "grad_norm": 1.9084290266036987, "learning_rate": 9.359778675305079e-06, "loss": 0.7925, "step": 5808 }, { "epoch": 0.40894051390355507, "grad_norm": 1.9344899654388428, "learning_rate": 9.358275936315019e-06, "loss": 0.7371, "step": 5809 }, { "epoch": 0.40901091165082715, "grad_norm": 2.0015761852264404, "learning_rate": 9.356773074716056e-06, "loss": 0.84, "step": 5810 }, { "epoch": 0.4090813093980993, "grad_norm": 1.8801084756851196, "learning_rate": 9.355270090586328e-06, "loss": 0.6419, "step": 5811 }, { "epoch": 0.40915170714537136, "grad_norm": 1.6894186735153198, "learning_rate": 9.353766984003972e-06, "loss": 0.6606, "step": 5812 }, { "epoch": 0.40922210489264343, "grad_norm": 2.00762677192688, "learning_rate": 9.35226375504714e-06, "loss": 0.78, "step": 5813 }, { "epoch": 0.4092925026399155, "grad_norm": 1.692471981048584, "learning_rate": 9.350760403793987e-06, "loss": 0.5739, "step": 5814 }, { "epoch": 0.4093629003871876, "grad_norm": 1.7218680381774902, "learning_rate": 9.349256930322668e-06, "loss": 0.7374, "step": 5815 }, { "epoch": 0.4094332981344597, "grad_norm": 1.7502589225769043, "learning_rate": 9.347753334711358e-06, "loss": 0.7216, "step": 5816 }, { "epoch": 0.4095036958817318, "grad_norm": 1.8518223762512207, "learning_rate": 9.346249617038223e-06, "loss": 0.748, "step": 5817 }, { "epoch": 0.4095740936290039, "grad_norm": 1.749830961227417, "learning_rate": 9.344745777381445e-06, "loss": 0.5684, "step": 5818 }, { "epoch": 0.40964449137627595, "grad_norm": 1.6912190914154053, "learning_rate": 9.343241815819213e-06, "loss": 0.6579, "step": 5819 }, { "epoch": 0.40971488912354803, "grad_norm": 2.3810219764709473, "learning_rate": 9.341737732429717e-06, "loss": 0.697, "step": 5820 }, { "epoch": 0.4097852868708201, "grad_norm": 2.124039649963379, "learning_rate": 9.340233527291152e-06, "loss": 0.6091, "step": 5821 }, { "epoch": 0.40985568461809224, "grad_norm": 1.5721747875213623, "learning_rate": 9.33872920048173e-06, "loss": 0.7068, "step": 5822 }, { "epoch": 0.4099260823653643, "grad_norm": 1.7351032495498657, "learning_rate": 9.337224752079656e-06, "loss": 0.8329, "step": 5823 }, { "epoch": 0.4099964801126364, "grad_norm": 1.5845797061920166, "learning_rate": 9.335720182163152e-06, "loss": 0.7711, "step": 5824 }, { "epoch": 0.4100668778599085, "grad_norm": 1.6640275716781616, "learning_rate": 9.33421549081044e-06, "loss": 0.6541, "step": 5825 }, { "epoch": 0.41013727560718055, "grad_norm": 1.7895370721817017, "learning_rate": 9.332710678099751e-06, "loss": 0.7219, "step": 5826 }, { "epoch": 0.4102076733544527, "grad_norm": 1.7705739736557007, "learning_rate": 9.33120574410932e-06, "loss": 0.6369, "step": 5827 }, { "epoch": 0.41027807110172476, "grad_norm": 1.7590675354003906, "learning_rate": 9.329700688917393e-06, "loss": 0.6526, "step": 5828 }, { "epoch": 0.41034846884899684, "grad_norm": 2.0010604858398438, "learning_rate": 9.328195512602216e-06, "loss": 0.7459, "step": 5829 }, { "epoch": 0.4104188665962689, "grad_norm": 1.8349838256835938, "learning_rate": 9.326690215242045e-06, "loss": 0.7644, "step": 5830 }, { "epoch": 0.410489264343541, "grad_norm": 2.194354295730591, "learning_rate": 9.325184796915146e-06, "loss": 0.7285, "step": 5831 }, { "epoch": 0.41055966209081307, "grad_norm": 1.7261016368865967, "learning_rate": 9.32367925769978e-06, "loss": 0.781, "step": 5832 }, { "epoch": 0.4106300598380852, "grad_norm": 1.6133323907852173, "learning_rate": 9.322173597674225e-06, "loss": 0.6355, "step": 5833 }, { "epoch": 0.4107004575853573, "grad_norm": 1.7576709985733032, "learning_rate": 9.320667816916762e-06, "loss": 0.7573, "step": 5834 }, { "epoch": 0.41077085533262936, "grad_norm": 1.8653875589370728, "learning_rate": 9.319161915505678e-06, "loss": 0.89, "step": 5835 }, { "epoch": 0.41084125307990144, "grad_norm": 1.8187925815582275, "learning_rate": 9.317655893519267e-06, "loss": 0.8293, "step": 5836 }, { "epoch": 0.4109116508271735, "grad_norm": 1.909471869468689, "learning_rate": 9.316149751035825e-06, "loss": 0.6844, "step": 5837 }, { "epoch": 0.4109820485744456, "grad_norm": 1.5758250951766968, "learning_rate": 9.314643488133661e-06, "loss": 0.7509, "step": 5838 }, { "epoch": 0.4110524463217177, "grad_norm": 1.7679216861724854, "learning_rate": 9.313137104891085e-06, "loss": 0.7468, "step": 5839 }, { "epoch": 0.4111228440689898, "grad_norm": 1.701890468597412, "learning_rate": 9.311630601386418e-06, "loss": 0.6896, "step": 5840 }, { "epoch": 0.4111932418162619, "grad_norm": 1.9473927021026611, "learning_rate": 9.310123977697981e-06, "loss": 0.8378, "step": 5841 }, { "epoch": 0.41126363956353396, "grad_norm": 1.8602027893066406, "learning_rate": 9.308617233904105e-06, "loss": 0.6679, "step": 5842 }, { "epoch": 0.41133403731080603, "grad_norm": 1.9628676176071167, "learning_rate": 9.307110370083132e-06, "loss": 0.6429, "step": 5843 }, { "epoch": 0.41140443505807817, "grad_norm": 1.5905227661132812, "learning_rate": 9.305603386313398e-06, "loss": 0.6552, "step": 5844 }, { "epoch": 0.41147483280535024, "grad_norm": 2.368514060974121, "learning_rate": 9.304096282673257e-06, "loss": 0.7594, "step": 5845 }, { "epoch": 0.4115452305526223, "grad_norm": 1.785260558128357, "learning_rate": 9.302589059241062e-06, "loss": 0.7107, "step": 5846 }, { "epoch": 0.4116156282998944, "grad_norm": 1.6443688869476318, "learning_rate": 9.301081716095178e-06, "loss": 0.803, "step": 5847 }, { "epoch": 0.4116860260471665, "grad_norm": 1.7285079956054688, "learning_rate": 9.299574253313972e-06, "loss": 0.6149, "step": 5848 }, { "epoch": 0.41175642379443855, "grad_norm": 1.6724612712860107, "learning_rate": 9.298066670975818e-06, "loss": 0.721, "step": 5849 }, { "epoch": 0.4118268215417107, "grad_norm": 1.829293131828308, "learning_rate": 9.296558969159096e-06, "loss": 0.6668, "step": 5850 }, { "epoch": 0.41189721928898276, "grad_norm": 1.6967962980270386, "learning_rate": 9.295051147942191e-06, "loss": 0.7854, "step": 5851 }, { "epoch": 0.41196761703625484, "grad_norm": 2.2284326553344727, "learning_rate": 9.293543207403503e-06, "loss": 0.7792, "step": 5852 }, { "epoch": 0.4120380147835269, "grad_norm": 1.6074142456054688, "learning_rate": 9.292035147621423e-06, "loss": 0.7999, "step": 5853 }, { "epoch": 0.412108412530799, "grad_norm": 1.6968129873275757, "learning_rate": 9.29052696867436e-06, "loss": 0.7158, "step": 5854 }, { "epoch": 0.41217881027807113, "grad_norm": 1.7896620035171509, "learning_rate": 9.289018670640728e-06, "loss": 0.7796, "step": 5855 }, { "epoch": 0.4122492080253432, "grad_norm": 1.837199091911316, "learning_rate": 9.28751025359894e-06, "loss": 0.7851, "step": 5856 }, { "epoch": 0.4123196057726153, "grad_norm": 1.5770806074142456, "learning_rate": 9.286001717627421e-06, "loss": 0.6915, "step": 5857 }, { "epoch": 0.41239000351988736, "grad_norm": 1.8756579160690308, "learning_rate": 9.284493062804606e-06, "loss": 0.7347, "step": 5858 }, { "epoch": 0.41246040126715944, "grad_norm": 1.7037806510925293, "learning_rate": 9.282984289208926e-06, "loss": 0.6999, "step": 5859 }, { "epoch": 0.4125307990144315, "grad_norm": 1.6347579956054688, "learning_rate": 9.281475396918823e-06, "loss": 0.7065, "step": 5860 }, { "epoch": 0.41260119676170365, "grad_norm": 1.9985543489456177, "learning_rate": 9.279966386012751e-06, "loss": 0.6959, "step": 5861 }, { "epoch": 0.4126715945089757, "grad_norm": 2.01326847076416, "learning_rate": 9.278457256569161e-06, "loss": 0.6603, "step": 5862 }, { "epoch": 0.4127419922562478, "grad_norm": 1.4518811702728271, "learning_rate": 9.276948008666515e-06, "loss": 0.6112, "step": 5863 }, { "epoch": 0.4128123900035199, "grad_norm": 1.518276333808899, "learning_rate": 9.27543864238328e-06, "loss": 0.6956, "step": 5864 }, { "epoch": 0.41288278775079196, "grad_norm": 1.9940561056137085, "learning_rate": 9.273929157797927e-06, "loss": 0.6012, "step": 5865 }, { "epoch": 0.41295318549806403, "grad_norm": 2.0651774406433105, "learning_rate": 9.27241955498894e-06, "loss": 0.6584, "step": 5866 }, { "epoch": 0.41302358324533617, "grad_norm": 1.6378350257873535, "learning_rate": 9.2709098340348e-06, "loss": 0.6374, "step": 5867 }, { "epoch": 0.41309398099260825, "grad_norm": 1.7147190570831299, "learning_rate": 9.269399995014005e-06, "loss": 0.7188, "step": 5868 }, { "epoch": 0.4131643787398803, "grad_norm": 2.0653791427612305, "learning_rate": 9.267890038005046e-06, "loss": 0.7582, "step": 5869 }, { "epoch": 0.4132347764871524, "grad_norm": 1.9030970335006714, "learning_rate": 9.26637996308643e-06, "loss": 0.6886, "step": 5870 }, { "epoch": 0.4133051742344245, "grad_norm": 2.076634168624878, "learning_rate": 9.264869770336667e-06, "loss": 0.7335, "step": 5871 }, { "epoch": 0.4133755719816966, "grad_norm": 2.021449089050293, "learning_rate": 9.263359459834275e-06, "loss": 0.741, "step": 5872 }, { "epoch": 0.4134459697289687, "grad_norm": 1.9878677129745483, "learning_rate": 9.261849031657774e-06, "loss": 0.8162, "step": 5873 }, { "epoch": 0.41351636747624076, "grad_norm": 1.7260687351226807, "learning_rate": 9.260338485885696e-06, "loss": 0.6415, "step": 5874 }, { "epoch": 0.41358676522351284, "grad_norm": 2.24116849899292, "learning_rate": 9.25882782259657e-06, "loss": 0.7929, "step": 5875 }, { "epoch": 0.4136571629707849, "grad_norm": 1.5809831619262695, "learning_rate": 9.257317041868939e-06, "loss": 0.6217, "step": 5876 }, { "epoch": 0.413727560718057, "grad_norm": 1.8305349349975586, "learning_rate": 9.255806143781353e-06, "loss": 0.689, "step": 5877 }, { "epoch": 0.41379795846532913, "grad_norm": 1.717634916305542, "learning_rate": 9.254295128412362e-06, "loss": 0.7234, "step": 5878 }, { "epoch": 0.4138683562126012, "grad_norm": 2.18241548538208, "learning_rate": 9.252783995840523e-06, "loss": 0.6636, "step": 5879 }, { "epoch": 0.4139387539598733, "grad_norm": 1.6264292001724243, "learning_rate": 9.251272746144407e-06, "loss": 0.7907, "step": 5880 }, { "epoch": 0.41400915170714536, "grad_norm": 1.794415831565857, "learning_rate": 9.249761379402582e-06, "loss": 0.7438, "step": 5881 }, { "epoch": 0.41407954945441744, "grad_norm": 1.747355580329895, "learning_rate": 9.248249895693624e-06, "loss": 0.7072, "step": 5882 }, { "epoch": 0.41414994720168957, "grad_norm": 1.6854356527328491, "learning_rate": 9.246738295096116e-06, "loss": 0.6312, "step": 5883 }, { "epoch": 0.41422034494896165, "grad_norm": 1.9729335308074951, "learning_rate": 9.245226577688652e-06, "loss": 0.7487, "step": 5884 }, { "epoch": 0.4142907426962337, "grad_norm": 2.06073260307312, "learning_rate": 9.243714743549825e-06, "loss": 0.7835, "step": 5885 }, { "epoch": 0.4143611404435058, "grad_norm": 1.9175682067871094, "learning_rate": 9.242202792758235e-06, "loss": 0.7067, "step": 5886 }, { "epoch": 0.4144315381907779, "grad_norm": 2.0126290321350098, "learning_rate": 9.240690725392493e-06, "loss": 0.7501, "step": 5887 }, { "epoch": 0.41450193593804996, "grad_norm": 1.7136175632476807, "learning_rate": 9.23917854153121e-06, "loss": 0.7689, "step": 5888 }, { "epoch": 0.4145723336853221, "grad_norm": 1.761779546737671, "learning_rate": 9.237666241253005e-06, "loss": 0.6554, "step": 5889 }, { "epoch": 0.41464273143259417, "grad_norm": 2.0988869667053223, "learning_rate": 9.236153824636508e-06, "loss": 0.6645, "step": 5890 }, { "epoch": 0.41471312917986625, "grad_norm": 1.7515437602996826, "learning_rate": 9.23464129176035e-06, "loss": 0.6615, "step": 5891 }, { "epoch": 0.4147835269271383, "grad_norm": 1.7992063760757446, "learning_rate": 9.233128642703164e-06, "loss": 0.6882, "step": 5892 }, { "epoch": 0.4148539246744104, "grad_norm": 1.6499199867248535, "learning_rate": 9.231615877543597e-06, "loss": 0.7349, "step": 5893 }, { "epoch": 0.4149243224216825, "grad_norm": 1.6978577375411987, "learning_rate": 9.230102996360301e-06, "loss": 0.7738, "step": 5894 }, { "epoch": 0.4149947201689546, "grad_norm": 1.6807516813278198, "learning_rate": 9.22858999923193e-06, "loss": 0.7187, "step": 5895 }, { "epoch": 0.4150651179162267, "grad_norm": 1.8575568199157715, "learning_rate": 9.227076886237147e-06, "loss": 0.7546, "step": 5896 }, { "epoch": 0.41513551566349877, "grad_norm": 1.6852152347564697, "learning_rate": 9.225563657454621e-06, "loss": 0.5608, "step": 5897 }, { "epoch": 0.41520591341077084, "grad_norm": 1.6787410974502563, "learning_rate": 9.224050312963023e-06, "loss": 0.6424, "step": 5898 }, { "epoch": 0.4152763111580429, "grad_norm": 2.0938761234283447, "learning_rate": 9.222536852841036e-06, "loss": 0.7235, "step": 5899 }, { "epoch": 0.41534670890531505, "grad_norm": 1.657214879989624, "learning_rate": 9.221023277167346e-06, "loss": 0.7116, "step": 5900 }, { "epoch": 0.41541710665258713, "grad_norm": 2.1542892456054688, "learning_rate": 9.219509586020642e-06, "loss": 0.8124, "step": 5901 }, { "epoch": 0.4154875043998592, "grad_norm": 1.6519473791122437, "learning_rate": 9.217995779479624e-06, "loss": 0.6792, "step": 5902 }, { "epoch": 0.4155579021471313, "grad_norm": 1.7338699102401733, "learning_rate": 9.216481857623e-06, "loss": 0.8351, "step": 5903 }, { "epoch": 0.41562829989440336, "grad_norm": 1.5467700958251953, "learning_rate": 9.214967820529477e-06, "loss": 0.5371, "step": 5904 }, { "epoch": 0.41569869764167544, "grad_norm": 1.8631147146224976, "learning_rate": 9.213453668277768e-06, "loss": 0.7494, "step": 5905 }, { "epoch": 0.4157690953889476, "grad_norm": 1.7144228219985962, "learning_rate": 9.211939400946599e-06, "loss": 0.7349, "step": 5906 }, { "epoch": 0.41583949313621965, "grad_norm": 1.6954641342163086, "learning_rate": 9.210425018614697e-06, "loss": 0.6924, "step": 5907 }, { "epoch": 0.41590989088349173, "grad_norm": 1.9504185914993286, "learning_rate": 9.208910521360798e-06, "loss": 0.7737, "step": 5908 }, { "epoch": 0.4159802886307638, "grad_norm": 1.6038826704025269, "learning_rate": 9.20739590926364e-06, "loss": 0.8121, "step": 5909 }, { "epoch": 0.4160506863780359, "grad_norm": 3.168085813522339, "learning_rate": 9.205881182401968e-06, "loss": 0.7033, "step": 5910 }, { "epoch": 0.416121084125308, "grad_norm": 1.8201075792312622, "learning_rate": 9.204366340854537e-06, "loss": 0.6996, "step": 5911 }, { "epoch": 0.4161914818725801, "grad_norm": 1.6835488080978394, "learning_rate": 9.202851384700105e-06, "loss": 0.5643, "step": 5912 }, { "epoch": 0.41626187961985217, "grad_norm": 1.5713342428207397, "learning_rate": 9.20133631401743e-06, "loss": 0.6248, "step": 5913 }, { "epoch": 0.41633227736712425, "grad_norm": 1.5994939804077148, "learning_rate": 9.199821128885291e-06, "loss": 0.7044, "step": 5914 }, { "epoch": 0.4164026751143963, "grad_norm": 1.5554718971252441, "learning_rate": 9.198305829382456e-06, "loss": 0.6726, "step": 5915 }, { "epoch": 0.4164730728616684, "grad_norm": 2.0141658782958984, "learning_rate": 9.196790415587712e-06, "loss": 0.7387, "step": 5916 }, { "epoch": 0.41654347060894054, "grad_norm": 1.692787766456604, "learning_rate": 9.195274887579844e-06, "loss": 0.7238, "step": 5917 }, { "epoch": 0.4166138683562126, "grad_norm": 2.2205770015716553, "learning_rate": 9.193759245437649e-06, "loss": 0.6621, "step": 5918 }, { "epoch": 0.4166842661034847, "grad_norm": 1.5265004634857178, "learning_rate": 9.19224348923992e-06, "loss": 0.5953, "step": 5919 }, { "epoch": 0.41675466385075677, "grad_norm": 1.6776925325393677, "learning_rate": 9.190727619065467e-06, "loss": 0.695, "step": 5920 }, { "epoch": 0.41682506159802885, "grad_norm": 1.5745277404785156, "learning_rate": 9.189211634993104e-06, "loss": 0.7163, "step": 5921 }, { "epoch": 0.4168954593453009, "grad_norm": 2.0779683589935303, "learning_rate": 9.187695537101643e-06, "loss": 0.7455, "step": 5922 }, { "epoch": 0.41696585709257306, "grad_norm": 1.7018486261367798, "learning_rate": 9.18617932546991e-06, "loss": 0.686, "step": 5923 }, { "epoch": 0.41703625483984513, "grad_norm": 1.8314191102981567, "learning_rate": 9.184663000176736e-06, "loss": 0.6973, "step": 5924 }, { "epoch": 0.4171066525871172, "grad_norm": 1.7758718729019165, "learning_rate": 9.183146561300953e-06, "loss": 0.6588, "step": 5925 }, { "epoch": 0.4171770503343893, "grad_norm": 1.9741671085357666, "learning_rate": 9.1816300089214e-06, "loss": 0.6741, "step": 5926 }, { "epoch": 0.41724744808166137, "grad_norm": 1.8304142951965332, "learning_rate": 9.180113343116932e-06, "loss": 0.706, "step": 5927 }, { "epoch": 0.4173178458289335, "grad_norm": 2.0238025188446045, "learning_rate": 9.178596563966393e-06, "loss": 0.7455, "step": 5928 }, { "epoch": 0.4173882435762056, "grad_norm": 1.7662196159362793, "learning_rate": 9.177079671548646e-06, "loss": 0.699, "step": 5929 }, { "epoch": 0.41745864132347765, "grad_norm": 1.6563290357589722, "learning_rate": 9.175562665942558e-06, "loss": 0.7385, "step": 5930 }, { "epoch": 0.41752903907074973, "grad_norm": 1.796033263206482, "learning_rate": 9.174045547226995e-06, "loss": 0.6337, "step": 5931 }, { "epoch": 0.4175994368180218, "grad_norm": 1.83510160446167, "learning_rate": 9.172528315480834e-06, "loss": 0.7263, "step": 5932 }, { "epoch": 0.4176698345652939, "grad_norm": 1.7271041870117188, "learning_rate": 9.17101097078296e-06, "loss": 0.6868, "step": 5933 }, { "epoch": 0.417740232312566, "grad_norm": 1.6123604774475098, "learning_rate": 9.169493513212257e-06, "loss": 0.737, "step": 5934 }, { "epoch": 0.4178106300598381, "grad_norm": 1.5335601568222046, "learning_rate": 9.167975942847623e-06, "loss": 0.6025, "step": 5935 }, { "epoch": 0.4178810278071102, "grad_norm": 1.5864094495773315, "learning_rate": 9.166458259767957e-06, "loss": 0.7115, "step": 5936 }, { "epoch": 0.41795142555438225, "grad_norm": 1.354303240776062, "learning_rate": 9.164940464052164e-06, "loss": 0.59, "step": 5937 }, { "epoch": 0.4180218233016543, "grad_norm": 1.6473647356033325, "learning_rate": 9.163422555779153e-06, "loss": 0.7264, "step": 5938 }, { "epoch": 0.41809222104892646, "grad_norm": 1.788986086845398, "learning_rate": 9.161904535027848e-06, "loss": 0.8376, "step": 5939 }, { "epoch": 0.41816261879619854, "grad_norm": 8.183385848999023, "learning_rate": 9.160386401877165e-06, "loss": 0.5682, "step": 5940 }, { "epoch": 0.4182330165434706, "grad_norm": 2.1290781497955322, "learning_rate": 9.158868156406039e-06, "loss": 0.7029, "step": 5941 }, { "epoch": 0.4183034142907427, "grad_norm": 2.003115177154541, "learning_rate": 9.157349798693402e-06, "loss": 0.6828, "step": 5942 }, { "epoch": 0.41837381203801477, "grad_norm": 1.631263017654419, "learning_rate": 9.155831328818193e-06, "loss": 0.6404, "step": 5943 }, { "epoch": 0.41844420978528685, "grad_norm": 1.727770447731018, "learning_rate": 9.154312746859362e-06, "loss": 0.687, "step": 5944 }, { "epoch": 0.418514607532559, "grad_norm": 1.6331084966659546, "learning_rate": 9.152794052895861e-06, "loss": 0.6774, "step": 5945 }, { "epoch": 0.41858500527983106, "grad_norm": 1.8184255361557007, "learning_rate": 9.151275247006647e-06, "loss": 0.6851, "step": 5946 }, { "epoch": 0.41865540302710313, "grad_norm": 1.7146469354629517, "learning_rate": 9.149756329270683e-06, "loss": 0.682, "step": 5947 }, { "epoch": 0.4187258007743752, "grad_norm": 1.820462703704834, "learning_rate": 9.148237299766943e-06, "loss": 0.7062, "step": 5948 }, { "epoch": 0.4187961985216473, "grad_norm": 1.920345425605774, "learning_rate": 9.1467181585744e-06, "loss": 0.6719, "step": 5949 }, { "epoch": 0.4188665962689194, "grad_norm": 1.9192575216293335, "learning_rate": 9.145198905772034e-06, "loss": 0.7031, "step": 5950 }, { "epoch": 0.4189369940161915, "grad_norm": 1.585629940032959, "learning_rate": 9.143679541438836e-06, "loss": 0.7008, "step": 5951 }, { "epoch": 0.4190073917634636, "grad_norm": 1.893792748451233, "learning_rate": 9.142160065653796e-06, "loss": 0.6447, "step": 5952 }, { "epoch": 0.41907778951073565, "grad_norm": 1.7490506172180176, "learning_rate": 9.140640478495913e-06, "loss": 0.6969, "step": 5953 }, { "epoch": 0.41914818725800773, "grad_norm": 1.970916509628296, "learning_rate": 9.139120780044196e-06, "loss": 0.8116, "step": 5954 }, { "epoch": 0.4192185850052798, "grad_norm": 1.7748239040374756, "learning_rate": 9.13760097037765e-06, "loss": 0.5927, "step": 5955 }, { "epoch": 0.41928898275255194, "grad_norm": 1.6638107299804688, "learning_rate": 9.136081049575294e-06, "loss": 0.7526, "step": 5956 }, { "epoch": 0.419359380499824, "grad_norm": 2.207810878753662, "learning_rate": 9.13456101771615e-06, "loss": 0.8264, "step": 5957 }, { "epoch": 0.4194297782470961, "grad_norm": 1.6681687831878662, "learning_rate": 9.133040874879245e-06, "loss": 0.7401, "step": 5958 }, { "epoch": 0.4195001759943682, "grad_norm": 1.7101582288742065, "learning_rate": 9.131520621143614e-06, "loss": 0.7035, "step": 5959 }, { "epoch": 0.41957057374164025, "grad_norm": 2.0611424446105957, "learning_rate": 9.130000256588295e-06, "loss": 0.6641, "step": 5960 }, { "epoch": 0.41964097148891233, "grad_norm": 1.9964933395385742, "learning_rate": 9.128479781292333e-06, "loss": 0.7333, "step": 5961 }, { "epoch": 0.41971136923618446, "grad_norm": 2.1196279525756836, "learning_rate": 9.12695919533478e-06, "loss": 0.7799, "step": 5962 }, { "epoch": 0.41978176698345654, "grad_norm": 1.9974027872085571, "learning_rate": 9.125438498794694e-06, "loss": 0.7039, "step": 5963 }, { "epoch": 0.4198521647307286, "grad_norm": 2.0838675498962402, "learning_rate": 9.123917691751131e-06, "loss": 0.7389, "step": 5964 }, { "epoch": 0.4199225624780007, "grad_norm": 1.790669560432434, "learning_rate": 9.122396774283168e-06, "loss": 0.7777, "step": 5965 }, { "epoch": 0.41999296022527277, "grad_norm": 2.3371360301971436, "learning_rate": 9.120875746469873e-06, "loss": 0.6864, "step": 5966 }, { "epoch": 0.4200633579725449, "grad_norm": 1.6986582279205322, "learning_rate": 9.119354608390327e-06, "loss": 0.6618, "step": 5967 }, { "epoch": 0.420133755719817, "grad_norm": 1.8604509830474854, "learning_rate": 9.117833360123614e-06, "loss": 0.6859, "step": 5968 }, { "epoch": 0.42020415346708906, "grad_norm": 1.643764853477478, "learning_rate": 9.11631200174883e-06, "loss": 0.774, "step": 5969 }, { "epoch": 0.42027455121436114, "grad_norm": 1.7667086124420166, "learning_rate": 9.114790533345067e-06, "loss": 0.7111, "step": 5970 }, { "epoch": 0.4203449489616332, "grad_norm": 1.5120468139648438, "learning_rate": 9.113268954991427e-06, "loss": 0.6617, "step": 5971 }, { "epoch": 0.4204153467089053, "grad_norm": 1.6778124570846558, "learning_rate": 9.111747266767024e-06, "loss": 0.6336, "step": 5972 }, { "epoch": 0.4204857444561774, "grad_norm": 1.7377848625183105, "learning_rate": 9.110225468750963e-06, "loss": 0.6751, "step": 5973 }, { "epoch": 0.4205561422034495, "grad_norm": 1.8581351041793823, "learning_rate": 9.108703561022372e-06, "loss": 0.6995, "step": 5974 }, { "epoch": 0.4206265399507216, "grad_norm": 1.9778764247894287, "learning_rate": 9.107181543660373e-06, "loss": 0.7295, "step": 5975 }, { "epoch": 0.42069693769799366, "grad_norm": 1.6007062196731567, "learning_rate": 9.105659416744099e-06, "loss": 0.7611, "step": 5976 }, { "epoch": 0.42076733544526573, "grad_norm": 1.6757190227508545, "learning_rate": 9.10413718035268e-06, "loss": 0.7581, "step": 5977 }, { "epoch": 0.42083773319253787, "grad_norm": 1.6099828481674194, "learning_rate": 9.102614834565268e-06, "loss": 0.6772, "step": 5978 }, { "epoch": 0.42090813093980994, "grad_norm": 2.1886744499206543, "learning_rate": 9.101092379461004e-06, "loss": 0.7257, "step": 5979 }, { "epoch": 0.420978528687082, "grad_norm": 1.910719871520996, "learning_rate": 9.099569815119045e-06, "loss": 0.6957, "step": 5980 }, { "epoch": 0.4210489264343541, "grad_norm": 1.7987372875213623, "learning_rate": 9.09804714161855e-06, "loss": 0.6628, "step": 5981 }, { "epoch": 0.4211193241816262, "grad_norm": 1.5422509908676147, "learning_rate": 9.096524359038685e-06, "loss": 0.7436, "step": 5982 }, { "epoch": 0.42118972192889825, "grad_norm": 2.2252793312072754, "learning_rate": 9.095001467458616e-06, "loss": 0.7099, "step": 5983 }, { "epoch": 0.4212601196761704, "grad_norm": 1.8518147468566895, "learning_rate": 9.093478466957526e-06, "loss": 0.7263, "step": 5984 }, { "epoch": 0.42133051742344246, "grad_norm": 1.963258981704712, "learning_rate": 9.091955357614594e-06, "loss": 0.645, "step": 5985 }, { "epoch": 0.42140091517071454, "grad_norm": 1.5187724828720093, "learning_rate": 9.09043213950901e-06, "loss": 0.7178, "step": 5986 }, { "epoch": 0.4214713129179866, "grad_norm": 1.5758106708526611, "learning_rate": 9.088908812719965e-06, "loss": 0.5517, "step": 5987 }, { "epoch": 0.4215417106652587, "grad_norm": 1.803167700767517, "learning_rate": 9.087385377326658e-06, "loss": 0.6678, "step": 5988 }, { "epoch": 0.4216121084125308, "grad_norm": 1.631888747215271, "learning_rate": 9.085861833408296e-06, "loss": 0.7368, "step": 5989 }, { "epoch": 0.4216825061598029, "grad_norm": 1.5951452255249023, "learning_rate": 9.084338181044088e-06, "loss": 0.6377, "step": 5990 }, { "epoch": 0.421752903907075, "grad_norm": 1.9890165328979492, "learning_rate": 9.08281442031325e-06, "loss": 0.8212, "step": 5991 }, { "epoch": 0.42182330165434706, "grad_norm": 1.9781975746154785, "learning_rate": 9.081290551295002e-06, "loss": 0.7134, "step": 5992 }, { "epoch": 0.42189369940161914, "grad_norm": 1.4421428442001343, "learning_rate": 9.079766574068577e-06, "loss": 0.5738, "step": 5993 }, { "epoch": 0.4219640971488912, "grad_norm": 2.0059127807617188, "learning_rate": 9.078242488713203e-06, "loss": 0.7876, "step": 5994 }, { "epoch": 0.42203449489616335, "grad_norm": 1.593414068222046, "learning_rate": 9.076718295308117e-06, "loss": 0.7179, "step": 5995 }, { "epoch": 0.4221048926434354, "grad_norm": 1.8936508893966675, "learning_rate": 9.07519399393257e-06, "loss": 0.7661, "step": 5996 }, { "epoch": 0.4221752903907075, "grad_norm": 1.9734700918197632, "learning_rate": 9.073669584665806e-06, "loss": 0.6657, "step": 5997 }, { "epoch": 0.4222456881379796, "grad_norm": 2.2361061573028564, "learning_rate": 9.072145067587082e-06, "loss": 0.6472, "step": 5998 }, { "epoch": 0.42231608588525166, "grad_norm": 1.6794118881225586, "learning_rate": 9.070620442775662e-06, "loss": 0.6223, "step": 5999 }, { "epoch": 0.42238648363252373, "grad_norm": 1.6167969703674316, "learning_rate": 9.069095710310807e-06, "loss": 0.6483, "step": 6000 }, { "epoch": 0.42245688137979587, "grad_norm": 1.6593986749649048, "learning_rate": 9.067570870271791e-06, "loss": 0.6857, "step": 6001 }, { "epoch": 0.42252727912706795, "grad_norm": 1.853012204170227, "learning_rate": 9.066045922737897e-06, "loss": 0.6013, "step": 6002 }, { "epoch": 0.42259767687434, "grad_norm": 1.7216752767562866, "learning_rate": 9.064520867788401e-06, "loss": 0.5474, "step": 6003 }, { "epoch": 0.4226680746216121, "grad_norm": 1.8159053325653076, "learning_rate": 9.062995705502596e-06, "loss": 0.7246, "step": 6004 }, { "epoch": 0.4227384723688842, "grad_norm": 1.7246172428131104, "learning_rate": 9.061470435959778e-06, "loss": 0.6814, "step": 6005 }, { "epoch": 0.4228088701161563, "grad_norm": 1.6900124549865723, "learning_rate": 9.059945059239244e-06, "loss": 0.6438, "step": 6006 }, { "epoch": 0.4228792678634284, "grad_norm": 1.9455031156539917, "learning_rate": 9.0584195754203e-06, "loss": 0.688, "step": 6007 }, { "epoch": 0.42294966561070046, "grad_norm": 1.7316709756851196, "learning_rate": 9.056893984582259e-06, "loss": 0.6912, "step": 6008 }, { "epoch": 0.42302006335797254, "grad_norm": 2.0152199268341064, "learning_rate": 9.055368286804435e-06, "loss": 0.6848, "step": 6009 }, { "epoch": 0.4230904611052446, "grad_norm": 1.828118085861206, "learning_rate": 9.053842482166152e-06, "loss": 0.7648, "step": 6010 }, { "epoch": 0.4231608588525167, "grad_norm": 1.843651533126831, "learning_rate": 9.052316570746743e-06, "loss": 0.6277, "step": 6011 }, { "epoch": 0.42323125659978883, "grad_norm": 2.0660784244537354, "learning_rate": 9.050790552625532e-06, "loss": 0.7138, "step": 6012 }, { "epoch": 0.4233016543470609, "grad_norm": 2.002849578857422, "learning_rate": 9.049264427881864e-06, "loss": 0.6952, "step": 6013 }, { "epoch": 0.423372052094333, "grad_norm": 1.8742600679397583, "learning_rate": 9.047738196595085e-06, "loss": 0.6525, "step": 6014 }, { "epoch": 0.42344244984160506, "grad_norm": 2.289966583251953, "learning_rate": 9.04621185884454e-06, "loss": 0.7379, "step": 6015 }, { "epoch": 0.42351284758887714, "grad_norm": 1.8117685317993164, "learning_rate": 9.044685414709586e-06, "loss": 0.7749, "step": 6016 }, { "epoch": 0.4235832453361492, "grad_norm": 1.528077483177185, "learning_rate": 9.043158864269589e-06, "loss": 0.648, "step": 6017 }, { "epoch": 0.42365364308342135, "grad_norm": 1.755890965461731, "learning_rate": 9.041632207603912e-06, "loss": 0.6647, "step": 6018 }, { "epoch": 0.4237240408306934, "grad_norm": 1.6660250425338745, "learning_rate": 9.040105444791924e-06, "loss": 0.5442, "step": 6019 }, { "epoch": 0.4237944385779655, "grad_norm": 1.637634038925171, "learning_rate": 9.03857857591301e-06, "loss": 0.7207, "step": 6020 }, { "epoch": 0.4238648363252376, "grad_norm": 1.8926656246185303, "learning_rate": 9.037051601046547e-06, "loss": 0.7386, "step": 6021 }, { "epoch": 0.42393523407250966, "grad_norm": 1.8775016069412231, "learning_rate": 9.035524520271927e-06, "loss": 0.8455, "step": 6022 }, { "epoch": 0.4240056318197818, "grad_norm": 1.7788927555084229, "learning_rate": 9.033997333668547e-06, "loss": 0.6964, "step": 6023 }, { "epoch": 0.42407602956705387, "grad_norm": 1.7364180088043213, "learning_rate": 9.032470041315799e-06, "loss": 0.7063, "step": 6024 }, { "epoch": 0.42414642731432595, "grad_norm": 1.78489089012146, "learning_rate": 9.030942643293095e-06, "loss": 0.754, "step": 6025 }, { "epoch": 0.424216825061598, "grad_norm": 2.33210825920105, "learning_rate": 9.029415139679844e-06, "loss": 0.7703, "step": 6026 }, { "epoch": 0.4242872228088701, "grad_norm": 1.5387938022613525, "learning_rate": 9.027887530555461e-06, "loss": 0.6126, "step": 6027 }, { "epoch": 0.4243576205561422, "grad_norm": 2.0938901901245117, "learning_rate": 9.02635981599937e-06, "loss": 0.5297, "step": 6028 }, { "epoch": 0.4244280183034143, "grad_norm": 2.002307653427124, "learning_rate": 9.024831996090995e-06, "loss": 0.6816, "step": 6029 }, { "epoch": 0.4244984160506864, "grad_norm": 1.6631735563278198, "learning_rate": 9.023304070909771e-06, "loss": 0.6354, "step": 6030 }, { "epoch": 0.42456881379795847, "grad_norm": 2.0020647048950195, "learning_rate": 9.021776040535136e-06, "loss": 0.5907, "step": 6031 }, { "epoch": 0.42463921154523054, "grad_norm": 1.9584906101226807, "learning_rate": 9.020247905046534e-06, "loss": 0.8727, "step": 6032 }, { "epoch": 0.4247096092925026, "grad_norm": 1.7690964937210083, "learning_rate": 9.018719664523415e-06, "loss": 0.7022, "step": 6033 }, { "epoch": 0.42478000703977475, "grad_norm": 2.1385812759399414, "learning_rate": 9.017191319045228e-06, "loss": 0.7544, "step": 6034 }, { "epoch": 0.42485040478704683, "grad_norm": 2.0051212310791016, "learning_rate": 9.015662868691441e-06, "loss": 0.7255, "step": 6035 }, { "epoch": 0.4249208025343189, "grad_norm": 1.9760196208953857, "learning_rate": 9.014134313541518e-06, "loss": 0.8218, "step": 6036 }, { "epoch": 0.424991200281591, "grad_norm": 1.684480905532837, "learning_rate": 9.012605653674923e-06, "loss": 0.6711, "step": 6037 }, { "epoch": 0.42506159802886306, "grad_norm": 1.5824445486068726, "learning_rate": 9.011076889171143e-06, "loss": 0.7765, "step": 6038 }, { "epoch": 0.42513199577613514, "grad_norm": 1.9525823593139648, "learning_rate": 9.00954802010965e-06, "loss": 0.7298, "step": 6039 }, { "epoch": 0.4252023935234073, "grad_norm": 1.7466341257095337, "learning_rate": 9.008019046569935e-06, "loss": 0.7049, "step": 6040 }, { "epoch": 0.42527279127067935, "grad_norm": 1.9534591436386108, "learning_rate": 9.006489968631495e-06, "loss": 0.7324, "step": 6041 }, { "epoch": 0.42534318901795143, "grad_norm": 1.677843451499939, "learning_rate": 9.004960786373823e-06, "loss": 0.6224, "step": 6042 }, { "epoch": 0.4254135867652235, "grad_norm": 1.5617378950119019, "learning_rate": 9.003431499876422e-06, "loss": 0.8049, "step": 6043 }, { "epoch": 0.4254839845124956, "grad_norm": 1.9448145627975464, "learning_rate": 9.001902109218805e-06, "loss": 0.7274, "step": 6044 }, { "epoch": 0.42555438225976766, "grad_norm": 1.7132102251052856, "learning_rate": 9.000372614480483e-06, "loss": 0.7631, "step": 6045 }, { "epoch": 0.4256247800070398, "grad_norm": 1.660667896270752, "learning_rate": 8.998843015740976e-06, "loss": 0.6277, "step": 6046 }, { "epoch": 0.42569517775431187, "grad_norm": 2.126674175262451, "learning_rate": 8.997313313079813e-06, "loss": 0.7845, "step": 6047 }, { "epoch": 0.42576557550158395, "grad_norm": 1.7029719352722168, "learning_rate": 8.99578350657652e-06, "loss": 0.7349, "step": 6048 }, { "epoch": 0.425835973248856, "grad_norm": 1.6776679754257202, "learning_rate": 8.994253596310635e-06, "loss": 0.7303, "step": 6049 }, { "epoch": 0.4259063709961281, "grad_norm": 1.9067351818084717, "learning_rate": 8.992723582361701e-06, "loss": 0.7061, "step": 6050 }, { "epoch": 0.42597676874340024, "grad_norm": 1.9534130096435547, "learning_rate": 8.991193464809262e-06, "loss": 0.7062, "step": 6051 }, { "epoch": 0.4260471664906723, "grad_norm": 1.7583552598953247, "learning_rate": 8.989663243732872e-06, "loss": 0.6504, "step": 6052 }, { "epoch": 0.4261175642379444, "grad_norm": 1.7378369569778442, "learning_rate": 8.98813291921209e-06, "loss": 0.6146, "step": 6053 }, { "epoch": 0.42618796198521647, "grad_norm": 1.667134165763855, "learning_rate": 8.986602491326474e-06, "loss": 0.6623, "step": 6054 }, { "epoch": 0.42625835973248855, "grad_norm": 1.8637518882751465, "learning_rate": 8.985071960155595e-06, "loss": 0.744, "step": 6055 }, { "epoch": 0.4263287574797606, "grad_norm": 2.2613425254821777, "learning_rate": 8.98354132577903e-06, "loss": 0.7062, "step": 6056 }, { "epoch": 0.42639915522703276, "grad_norm": 1.6338573694229126, "learning_rate": 8.982010588276353e-06, "loss": 0.7003, "step": 6057 }, { "epoch": 0.42646955297430483, "grad_norm": 1.5336662530899048, "learning_rate": 8.98047974772715e-06, "loss": 0.8405, "step": 6058 }, { "epoch": 0.4265399507215769, "grad_norm": 2.803811550140381, "learning_rate": 8.978948804211013e-06, "loss": 0.6873, "step": 6059 }, { "epoch": 0.426610348468849, "grad_norm": 1.6783676147460938, "learning_rate": 8.977417757807535e-06, "loss": 0.7437, "step": 6060 }, { "epoch": 0.42668074621612107, "grad_norm": 1.7064518928527832, "learning_rate": 8.975886608596315e-06, "loss": 0.786, "step": 6061 }, { "epoch": 0.4267511439633932, "grad_norm": 2.017835855484009, "learning_rate": 8.974355356656963e-06, "loss": 0.7841, "step": 6062 }, { "epoch": 0.4268215417106653, "grad_norm": 2.0246951580047607, "learning_rate": 8.972824002069087e-06, "loss": 0.6172, "step": 6063 }, { "epoch": 0.42689193945793735, "grad_norm": 1.7762298583984375, "learning_rate": 8.971292544912304e-06, "loss": 0.6771, "step": 6064 }, { "epoch": 0.42696233720520943, "grad_norm": 1.7895864248275757, "learning_rate": 8.969760985266236e-06, "loss": 0.6944, "step": 6065 }, { "epoch": 0.4270327349524815, "grad_norm": 1.807686686515808, "learning_rate": 8.968229323210511e-06, "loss": 0.7149, "step": 6066 }, { "epoch": 0.4271031326997536, "grad_norm": 1.6709282398223877, "learning_rate": 8.966697558824763e-06, "loss": 0.7322, "step": 6067 }, { "epoch": 0.4271735304470257, "grad_norm": 1.7924108505249023, "learning_rate": 8.965165692188626e-06, "loss": 0.7308, "step": 6068 }, { "epoch": 0.4272439281942978, "grad_norm": 2.2598326206207275, "learning_rate": 8.963633723381745e-06, "loss": 0.6767, "step": 6069 }, { "epoch": 0.4273143259415699, "grad_norm": 2.0827717781066895, "learning_rate": 8.962101652483767e-06, "loss": 0.6026, "step": 6070 }, { "epoch": 0.42738472368884195, "grad_norm": 1.7441837787628174, "learning_rate": 8.96056947957435e-06, "loss": 0.7273, "step": 6071 }, { "epoch": 0.427455121436114, "grad_norm": 1.9211640357971191, "learning_rate": 8.959037204733147e-06, "loss": 0.6615, "step": 6072 }, { "epoch": 0.4275255191833861, "grad_norm": 2.1747751235961914, "learning_rate": 8.957504828039827e-06, "loss": 0.609, "step": 6073 }, { "epoch": 0.42759591693065824, "grad_norm": 2.652926445007324, "learning_rate": 8.955972349574059e-06, "loss": 0.6951, "step": 6074 }, { "epoch": 0.4276663146779303, "grad_norm": 1.934799313545227, "learning_rate": 8.954439769415517e-06, "loss": 0.6838, "step": 6075 }, { "epoch": 0.4277367124252024, "grad_norm": 1.7399855852127075, "learning_rate": 8.952907087643879e-06, "loss": 0.7663, "step": 6076 }, { "epoch": 0.42780711017247447, "grad_norm": 1.8127607107162476, "learning_rate": 8.951374304338836e-06, "loss": 0.7071, "step": 6077 }, { "epoch": 0.42787750791974655, "grad_norm": 1.7860937118530273, "learning_rate": 8.949841419580073e-06, "loss": 0.658, "step": 6078 }, { "epoch": 0.4279479056670187, "grad_norm": 1.3782943487167358, "learning_rate": 8.948308433447288e-06, "loss": 0.7731, "step": 6079 }, { "epoch": 0.42801830341429076, "grad_norm": 1.731669306755066, "learning_rate": 8.946775346020188e-06, "loss": 0.6907, "step": 6080 }, { "epoch": 0.42808870116156283, "grad_norm": 1.8618292808532715, "learning_rate": 8.94524215737847e-06, "loss": 0.7501, "step": 6081 }, { "epoch": 0.4281590989088349, "grad_norm": 1.6177434921264648, "learning_rate": 8.943708867601852e-06, "loss": 0.6578, "step": 6082 }, { "epoch": 0.428229496656107, "grad_norm": 1.8015034198760986, "learning_rate": 8.94217547677005e-06, "loss": 0.6268, "step": 6083 }, { "epoch": 0.42829989440337907, "grad_norm": 1.6952227354049683, "learning_rate": 8.940641984962787e-06, "loss": 0.6325, "step": 6084 }, { "epoch": 0.4283702921506512, "grad_norm": 1.875742793083191, "learning_rate": 8.939108392259786e-06, "loss": 0.78, "step": 6085 }, { "epoch": 0.4284406898979233, "grad_norm": 1.8028175830841064, "learning_rate": 8.937574698740788e-06, "loss": 0.7735, "step": 6086 }, { "epoch": 0.42851108764519535, "grad_norm": 1.8013789653778076, "learning_rate": 8.936040904485523e-06, "loss": 0.666, "step": 6087 }, { "epoch": 0.42858148539246743, "grad_norm": 1.7918307781219482, "learning_rate": 8.93450700957374e-06, "loss": 0.7198, "step": 6088 }, { "epoch": 0.4286518831397395, "grad_norm": 1.8849347829818726, "learning_rate": 8.932973014085184e-06, "loss": 0.6578, "step": 6089 }, { "epoch": 0.42872228088701164, "grad_norm": 2.1687722206115723, "learning_rate": 8.931438918099612e-06, "loss": 0.7192, "step": 6090 }, { "epoch": 0.4287926786342837, "grad_norm": 1.6919245719909668, "learning_rate": 8.92990472169678e-06, "loss": 0.6916, "step": 6091 }, { "epoch": 0.4288630763815558, "grad_norm": 1.9113175868988037, "learning_rate": 8.928370424956457e-06, "loss": 0.7497, "step": 6092 }, { "epoch": 0.4289334741288279, "grad_norm": 1.6480077505111694, "learning_rate": 8.926836027958405e-06, "loss": 0.6811, "step": 6093 }, { "epoch": 0.42900387187609995, "grad_norm": 3.886080503463745, "learning_rate": 8.925301530782405e-06, "loss": 0.6795, "step": 6094 }, { "epoch": 0.42907426962337203, "grad_norm": 1.8421233892440796, "learning_rate": 8.923766933508236e-06, "loss": 0.6618, "step": 6095 }, { "epoch": 0.42914466737064416, "grad_norm": 1.9346898794174194, "learning_rate": 8.92223223621568e-06, "loss": 0.6136, "step": 6096 }, { "epoch": 0.42921506511791624, "grad_norm": 1.6411617994308472, "learning_rate": 8.920697438984528e-06, "loss": 0.7398, "step": 6097 }, { "epoch": 0.4292854628651883, "grad_norm": 3.048496961593628, "learning_rate": 8.919162541894578e-06, "loss": 0.6536, "step": 6098 }, { "epoch": 0.4293558606124604, "grad_norm": 2.1733858585357666, "learning_rate": 8.91762754502563e-06, "loss": 0.7066, "step": 6099 }, { "epoch": 0.42942625835973247, "grad_norm": 1.6760640144348145, "learning_rate": 8.91609244845749e-06, "loss": 0.6581, "step": 6100 }, { "epoch": 0.42949665610700455, "grad_norm": 1.5646225214004517, "learning_rate": 8.914557252269968e-06, "loss": 0.6716, "step": 6101 }, { "epoch": 0.4295670538542767, "grad_norm": 1.6082866191864014, "learning_rate": 8.913021956542877e-06, "loss": 0.7023, "step": 6102 }, { "epoch": 0.42963745160154876, "grad_norm": 1.844744324684143, "learning_rate": 8.911486561356045e-06, "loss": 0.7483, "step": 6103 }, { "epoch": 0.42970784934882084, "grad_norm": 2.1872754096984863, "learning_rate": 8.909951066789297e-06, "loss": 0.6975, "step": 6104 }, { "epoch": 0.4297782470960929, "grad_norm": 1.6072494983673096, "learning_rate": 8.90841547292246e-06, "loss": 0.6945, "step": 6105 }, { "epoch": 0.429848644843365, "grad_norm": 1.8777034282684326, "learning_rate": 8.906879779835376e-06, "loss": 0.6827, "step": 6106 }, { "epoch": 0.4299190425906371, "grad_norm": 1.6227378845214844, "learning_rate": 8.905343987607886e-06, "loss": 0.6682, "step": 6107 }, { "epoch": 0.4299894403379092, "grad_norm": 1.720977783203125, "learning_rate": 8.903808096319834e-06, "loss": 0.6584, "step": 6108 }, { "epoch": 0.4300598380851813, "grad_norm": 1.7590538263320923, "learning_rate": 8.902272106051076e-06, "loss": 0.5555, "step": 6109 }, { "epoch": 0.43013023583245336, "grad_norm": 2.12790846824646, "learning_rate": 8.90073601688147e-06, "loss": 0.7482, "step": 6110 }, { "epoch": 0.43020063357972543, "grad_norm": 1.776079773902893, "learning_rate": 8.899199828890874e-06, "loss": 0.7735, "step": 6111 }, { "epoch": 0.4302710313269975, "grad_norm": 2.7456791400909424, "learning_rate": 8.897663542159159e-06, "loss": 0.7765, "step": 6112 }, { "epoch": 0.43034142907426964, "grad_norm": 1.519700527191162, "learning_rate": 8.8961271567662e-06, "loss": 0.6656, "step": 6113 }, { "epoch": 0.4304118268215417, "grad_norm": 1.6653283834457397, "learning_rate": 8.894590672791874e-06, "loss": 0.6442, "step": 6114 }, { "epoch": 0.4304822245688138, "grad_norm": 3.162923812866211, "learning_rate": 8.89305409031606e-06, "loss": 0.7192, "step": 6115 }, { "epoch": 0.4305526223160859, "grad_norm": 1.9119908809661865, "learning_rate": 8.891517409418651e-06, "loss": 0.6934, "step": 6116 }, { "epoch": 0.43062302006335795, "grad_norm": 1.6352273225784302, "learning_rate": 8.88998063017954e-06, "loss": 0.6326, "step": 6117 }, { "epoch": 0.4306934178106301, "grad_norm": 1.728672742843628, "learning_rate": 8.888443752678622e-06, "loss": 0.7021, "step": 6118 }, { "epoch": 0.43076381555790216, "grad_norm": 1.7484896183013916, "learning_rate": 8.886906776995805e-06, "loss": 0.652, "step": 6119 }, { "epoch": 0.43083421330517424, "grad_norm": 1.8095473051071167, "learning_rate": 8.885369703210995e-06, "loss": 0.7416, "step": 6120 }, { "epoch": 0.4309046110524463, "grad_norm": 1.9170221090316772, "learning_rate": 8.883832531404104e-06, "loss": 0.762, "step": 6121 }, { "epoch": 0.4309750087997184, "grad_norm": 1.516690731048584, "learning_rate": 8.882295261655058e-06, "loss": 0.6347, "step": 6122 }, { "epoch": 0.4310454065469905, "grad_norm": 1.9045478105545044, "learning_rate": 8.880757894043773e-06, "loss": 0.6524, "step": 6123 }, { "epoch": 0.4311158042942626, "grad_norm": 1.684240698814392, "learning_rate": 8.879220428650184e-06, "loss": 0.8309, "step": 6124 }, { "epoch": 0.4311862020415347, "grad_norm": 1.707969069480896, "learning_rate": 8.877682865554223e-06, "loss": 0.7309, "step": 6125 }, { "epoch": 0.43125659978880676, "grad_norm": 1.7326712608337402, "learning_rate": 8.876145204835829e-06, "loss": 0.7532, "step": 6126 }, { "epoch": 0.43132699753607884, "grad_norm": 1.7595962285995483, "learning_rate": 8.874607446574945e-06, "loss": 0.6478, "step": 6127 }, { "epoch": 0.4313973952833509, "grad_norm": 1.5578160285949707, "learning_rate": 8.873069590851524e-06, "loss": 0.6492, "step": 6128 }, { "epoch": 0.431467793030623, "grad_norm": 1.7854139804840088, "learning_rate": 8.871531637745519e-06, "loss": 0.7523, "step": 6129 }, { "epoch": 0.4315381907778951, "grad_norm": 1.899827003479004, "learning_rate": 8.869993587336887e-06, "loss": 0.6671, "step": 6130 }, { "epoch": 0.4316085885251672, "grad_norm": 1.6503262519836426, "learning_rate": 8.868455439705596e-06, "loss": 0.6296, "step": 6131 }, { "epoch": 0.4316789862724393, "grad_norm": 1.9880658388137817, "learning_rate": 8.866917194931615e-06, "loss": 0.6729, "step": 6132 }, { "epoch": 0.43174938401971136, "grad_norm": 2.0445117950439453, "learning_rate": 8.865378853094918e-06, "loss": 0.8272, "step": 6133 }, { "epoch": 0.43181978176698343, "grad_norm": 1.7034364938735962, "learning_rate": 8.863840414275486e-06, "loss": 0.6984, "step": 6134 }, { "epoch": 0.43189017951425557, "grad_norm": 2.110081434249878, "learning_rate": 8.862301878553299e-06, "loss": 0.7848, "step": 6135 }, { "epoch": 0.43196057726152765, "grad_norm": 1.6407935619354248, "learning_rate": 8.860763246008352e-06, "loss": 0.8054, "step": 6136 }, { "epoch": 0.4320309750087997, "grad_norm": 1.817347764968872, "learning_rate": 8.859224516720642e-06, "loss": 0.6305, "step": 6137 }, { "epoch": 0.4321013727560718, "grad_norm": 1.6242033243179321, "learning_rate": 8.857685690770163e-06, "loss": 0.6708, "step": 6138 }, { "epoch": 0.4321717705033439, "grad_norm": 1.7402594089508057, "learning_rate": 8.856146768236922e-06, "loss": 0.772, "step": 6139 }, { "epoch": 0.43224216825061595, "grad_norm": 1.5291111469268799, "learning_rate": 8.854607749200932e-06, "loss": 0.6241, "step": 6140 }, { "epoch": 0.4323125659978881, "grad_norm": 1.7952951192855835, "learning_rate": 8.853068633742203e-06, "loss": 0.6416, "step": 6141 }, { "epoch": 0.43238296374516016, "grad_norm": 1.6076984405517578, "learning_rate": 8.85152942194076e-06, "loss": 0.5731, "step": 6142 }, { "epoch": 0.43245336149243224, "grad_norm": 1.8722624778747559, "learning_rate": 8.849990113876623e-06, "loss": 0.6986, "step": 6143 }, { "epoch": 0.4325237592397043, "grad_norm": 2.0265331268310547, "learning_rate": 8.848450709629827e-06, "loss": 0.6838, "step": 6144 }, { "epoch": 0.4325941569869764, "grad_norm": 1.6699728965759277, "learning_rate": 8.846911209280403e-06, "loss": 0.6457, "step": 6145 }, { "epoch": 0.43266455473424853, "grad_norm": 1.6367628574371338, "learning_rate": 8.845371612908395e-06, "loss": 0.7854, "step": 6146 }, { "epoch": 0.4327349524815206, "grad_norm": 1.7140562534332275, "learning_rate": 8.843831920593842e-06, "loss": 0.6752, "step": 6147 }, { "epoch": 0.4328053502287927, "grad_norm": 1.7248181104660034, "learning_rate": 8.842292132416798e-06, "loss": 0.5691, "step": 6148 }, { "epoch": 0.43287574797606476, "grad_norm": 2.124762535095215, "learning_rate": 8.84075224845732e-06, "loss": 0.6681, "step": 6149 }, { "epoch": 0.43294614572333684, "grad_norm": 1.7293627262115479, "learning_rate": 8.839212268795463e-06, "loss": 0.7443, "step": 6150 }, { "epoch": 0.4330165434706089, "grad_norm": 1.7863357067108154, "learning_rate": 8.837672193511294e-06, "loss": 0.6799, "step": 6151 }, { "epoch": 0.43308694121788105, "grad_norm": 1.714564323425293, "learning_rate": 8.836132022684885e-06, "loss": 0.6033, "step": 6152 }, { "epoch": 0.4331573389651531, "grad_norm": 1.675366759300232, "learning_rate": 8.834591756396311e-06, "loss": 0.6785, "step": 6153 }, { "epoch": 0.4332277367124252, "grad_norm": 2.0534327030181885, "learning_rate": 8.833051394725647e-06, "loss": 0.7445, "step": 6154 }, { "epoch": 0.4332981344596973, "grad_norm": 1.8206974267959595, "learning_rate": 8.831510937752982e-06, "loss": 0.6548, "step": 6155 }, { "epoch": 0.43336853220696936, "grad_norm": 2.2733724117279053, "learning_rate": 8.829970385558405e-06, "loss": 0.7022, "step": 6156 }, { "epoch": 0.4334389299542415, "grad_norm": 1.9045133590698242, "learning_rate": 8.82842973822201e-06, "loss": 0.743, "step": 6157 }, { "epoch": 0.43350932770151357, "grad_norm": 1.6824142932891846, "learning_rate": 8.826888995823897e-06, "loss": 0.7145, "step": 6158 }, { "epoch": 0.43357972544878565, "grad_norm": 1.7415436506271362, "learning_rate": 8.82534815844417e-06, "loss": 0.6636, "step": 6159 }, { "epoch": 0.4336501231960577, "grad_norm": 1.649789571762085, "learning_rate": 8.82380722616294e-06, "loss": 0.7411, "step": 6160 }, { "epoch": 0.4337205209433298, "grad_norm": 1.790529727935791, "learning_rate": 8.82226619906032e-06, "loss": 0.6967, "step": 6161 }, { "epoch": 0.4337909186906019, "grad_norm": 1.8433711528778076, "learning_rate": 8.820725077216432e-06, "loss": 0.8625, "step": 6162 }, { "epoch": 0.433861316437874, "grad_norm": 2.1198556423187256, "learning_rate": 8.819183860711395e-06, "loss": 0.7321, "step": 6163 }, { "epoch": 0.4339317141851461, "grad_norm": 1.6895127296447754, "learning_rate": 8.817642549625344e-06, "loss": 0.6978, "step": 6164 }, { "epoch": 0.43400211193241817, "grad_norm": 1.7161409854888916, "learning_rate": 8.81610114403841e-06, "loss": 0.7664, "step": 6165 }, { "epoch": 0.43407250967969024, "grad_norm": 2.1388628482818604, "learning_rate": 8.814559644030732e-06, "loss": 0.6839, "step": 6166 }, { "epoch": 0.4341429074269623, "grad_norm": 1.4826297760009766, "learning_rate": 8.813018049682454e-06, "loss": 0.6395, "step": 6167 }, { "epoch": 0.4342133051742344, "grad_norm": 2.092965841293335, "learning_rate": 8.81147636107373e-06, "loss": 0.7017, "step": 6168 }, { "epoch": 0.43428370292150653, "grad_norm": 1.6289697885513306, "learning_rate": 8.809934578284704e-06, "loss": 0.7844, "step": 6169 }, { "epoch": 0.4343541006687786, "grad_norm": 1.835240125656128, "learning_rate": 8.808392701395544e-06, "loss": 0.6542, "step": 6170 }, { "epoch": 0.4344244984160507, "grad_norm": 1.7020857334136963, "learning_rate": 8.806850730486406e-06, "loss": 0.5814, "step": 6171 }, { "epoch": 0.43449489616332276, "grad_norm": 1.4651219844818115, "learning_rate": 8.805308665637466e-06, "loss": 0.5753, "step": 6172 }, { "epoch": 0.43456529391059484, "grad_norm": 1.9983229637145996, "learning_rate": 8.803766506928892e-06, "loss": 0.6336, "step": 6173 }, { "epoch": 0.434635691657867, "grad_norm": 1.797161340713501, "learning_rate": 8.802224254440863e-06, "loss": 0.6261, "step": 6174 }, { "epoch": 0.43470608940513905, "grad_norm": 1.464009404182434, "learning_rate": 8.800681908253561e-06, "loss": 0.7279, "step": 6175 }, { "epoch": 0.43477648715241113, "grad_norm": 1.6144942045211792, "learning_rate": 8.79913946844718e-06, "loss": 0.8033, "step": 6176 }, { "epoch": 0.4348468848996832, "grad_norm": 2.436687707901001, "learning_rate": 8.797596935101906e-06, "loss": 0.7107, "step": 6177 }, { "epoch": 0.4349172826469553, "grad_norm": 1.7799311876296997, "learning_rate": 8.79605430829794e-06, "loss": 0.7217, "step": 6178 }, { "epoch": 0.43498768039422736, "grad_norm": 1.8447139263153076, "learning_rate": 8.794511588115486e-06, "loss": 0.7565, "step": 6179 }, { "epoch": 0.4350580781414995, "grad_norm": 2.0695087909698486, "learning_rate": 8.792968774634747e-06, "loss": 0.8144, "step": 6180 }, { "epoch": 0.43512847588877157, "grad_norm": 1.5516719818115234, "learning_rate": 8.791425867935935e-06, "loss": 0.7746, "step": 6181 }, { "epoch": 0.43519887363604365, "grad_norm": 1.815410852432251, "learning_rate": 8.789882868099274e-06, "loss": 0.6386, "step": 6182 }, { "epoch": 0.4352692713833157, "grad_norm": 1.8287253379821777, "learning_rate": 8.78833977520498e-06, "loss": 0.7354, "step": 6183 }, { "epoch": 0.4353396691305878, "grad_norm": 1.6869179010391235, "learning_rate": 8.786796589333283e-06, "loss": 0.8252, "step": 6184 }, { "epoch": 0.43541006687785994, "grad_norm": 2.0918076038360596, "learning_rate": 8.78525331056441e-06, "loss": 0.6715, "step": 6185 }, { "epoch": 0.435480464625132, "grad_norm": 1.736013412475586, "learning_rate": 8.783709938978602e-06, "loss": 0.9359, "step": 6186 }, { "epoch": 0.4355508623724041, "grad_norm": 2.161222457885742, "learning_rate": 8.782166474656099e-06, "loss": 0.7305, "step": 6187 }, { "epoch": 0.43562126011967617, "grad_norm": 1.958014965057373, "learning_rate": 8.780622917677149e-06, "loss": 0.751, "step": 6188 }, { "epoch": 0.43569165786694825, "grad_norm": 1.7071806192398071, "learning_rate": 8.779079268121997e-06, "loss": 0.6508, "step": 6189 }, { "epoch": 0.4357620556142203, "grad_norm": 1.9760563373565674, "learning_rate": 8.777535526070904e-06, "loss": 0.6111, "step": 6190 }, { "epoch": 0.43583245336149246, "grad_norm": 1.7810077667236328, "learning_rate": 8.775991691604132e-06, "loss": 0.7323, "step": 6191 }, { "epoch": 0.43590285110876453, "grad_norm": 1.7099894285202026, "learning_rate": 8.77444776480194e-06, "loss": 0.6888, "step": 6192 }, { "epoch": 0.4359732488560366, "grad_norm": 1.7208329439163208, "learning_rate": 8.772903745744604e-06, "loss": 0.6635, "step": 6193 }, { "epoch": 0.4360436466033087, "grad_norm": 1.6958746910095215, "learning_rate": 8.771359634512396e-06, "loss": 0.6564, "step": 6194 }, { "epoch": 0.43611404435058077, "grad_norm": 1.8377577066421509, "learning_rate": 8.769815431185596e-06, "loss": 0.8092, "step": 6195 }, { "epoch": 0.43618444209785284, "grad_norm": 2.0569870471954346, "learning_rate": 8.768271135844489e-06, "loss": 0.8128, "step": 6196 }, { "epoch": 0.436254839845125, "grad_norm": 3.2467615604400635, "learning_rate": 8.766726748569365e-06, "loss": 0.5876, "step": 6197 }, { "epoch": 0.43632523759239705, "grad_norm": 1.8247281312942505, "learning_rate": 8.765182269440514e-06, "loss": 0.6893, "step": 6198 }, { "epoch": 0.43639563533966913, "grad_norm": 1.9285614490509033, "learning_rate": 8.76363769853824e-06, "loss": 0.7022, "step": 6199 }, { "epoch": 0.4364660330869412, "grad_norm": 1.653703212738037, "learning_rate": 8.762093035942848e-06, "loss": 0.7721, "step": 6200 }, { "epoch": 0.4365364308342133, "grad_norm": 2.4136390686035156, "learning_rate": 8.76054828173464e-06, "loss": 0.7967, "step": 6201 }, { "epoch": 0.4366068285814854, "grad_norm": 1.7674696445465088, "learning_rate": 8.759003435993933e-06, "loss": 0.6209, "step": 6202 }, { "epoch": 0.4366772263287575, "grad_norm": 1.9998570680618286, "learning_rate": 8.757458498801045e-06, "loss": 0.6601, "step": 6203 }, { "epoch": 0.4367476240760296, "grad_norm": 1.915947437286377, "learning_rate": 8.755913470236296e-06, "loss": 0.7241, "step": 6204 }, { "epoch": 0.43681802182330165, "grad_norm": 2.2826759815216064, "learning_rate": 8.754368350380018e-06, "loss": 0.7285, "step": 6205 }, { "epoch": 0.4368884195705737, "grad_norm": 1.9468077421188354, "learning_rate": 8.752823139312539e-06, "loss": 0.6244, "step": 6206 }, { "epoch": 0.4369588173178458, "grad_norm": 2.063021183013916, "learning_rate": 8.751277837114197e-06, "loss": 0.745, "step": 6207 }, { "epoch": 0.43702921506511794, "grad_norm": 1.9307734966278076, "learning_rate": 8.749732443865337e-06, "loss": 0.8397, "step": 6208 }, { "epoch": 0.43709961281239, "grad_norm": 2.016983985900879, "learning_rate": 8.7481869596463e-06, "loss": 0.6439, "step": 6209 }, { "epoch": 0.4371700105596621, "grad_norm": 1.8454474210739136, "learning_rate": 8.746641384537441e-06, "loss": 0.66, "step": 6210 }, { "epoch": 0.43724040830693417, "grad_norm": 1.5935415029525757, "learning_rate": 8.745095718619114e-06, "loss": 0.687, "step": 6211 }, { "epoch": 0.43731080605420625, "grad_norm": 2.3259243965148926, "learning_rate": 8.74354996197168e-06, "loss": 0.742, "step": 6212 }, { "epoch": 0.4373812038014784, "grad_norm": 2.0234336853027344, "learning_rate": 8.742004114675504e-06, "loss": 0.7989, "step": 6213 }, { "epoch": 0.43745160154875046, "grad_norm": 1.6545205116271973, "learning_rate": 8.740458176810957e-06, "loss": 0.6575, "step": 6214 }, { "epoch": 0.43752199929602253, "grad_norm": 2.2341396808624268, "learning_rate": 8.738912148458414e-06, "loss": 0.7591, "step": 6215 }, { "epoch": 0.4375923970432946, "grad_norm": 1.9738550186157227, "learning_rate": 8.737366029698251e-06, "loss": 0.6566, "step": 6216 }, { "epoch": 0.4376627947905667, "grad_norm": 1.658828854560852, "learning_rate": 8.735819820610856e-06, "loss": 0.5638, "step": 6217 }, { "epoch": 0.43773319253783877, "grad_norm": 1.6319608688354492, "learning_rate": 8.734273521276618e-06, "loss": 0.5819, "step": 6218 }, { "epoch": 0.4378035902851109, "grad_norm": 2.2767651081085205, "learning_rate": 8.732727131775928e-06, "loss": 0.7466, "step": 6219 }, { "epoch": 0.437873988032383, "grad_norm": 1.8303794860839844, "learning_rate": 8.731180652189185e-06, "loss": 0.6441, "step": 6220 }, { "epoch": 0.43794438577965505, "grad_norm": 1.9100191593170166, "learning_rate": 8.729634082596791e-06, "loss": 0.7135, "step": 6221 }, { "epoch": 0.43801478352692713, "grad_norm": 1.7303392887115479, "learning_rate": 8.728087423079155e-06, "loss": 0.6768, "step": 6222 }, { "epoch": 0.4380851812741992, "grad_norm": 1.9334133863449097, "learning_rate": 8.726540673716692e-06, "loss": 0.77, "step": 6223 }, { "epoch": 0.4381555790214713, "grad_norm": 1.8955438137054443, "learning_rate": 8.724993834589813e-06, "loss": 0.6964, "step": 6224 }, { "epoch": 0.4382259767687434, "grad_norm": 1.7639063596725464, "learning_rate": 8.723446905778942e-06, "loss": 0.6766, "step": 6225 }, { "epoch": 0.4382963745160155, "grad_norm": 1.8527770042419434, "learning_rate": 8.721899887364505e-06, "loss": 0.6673, "step": 6226 }, { "epoch": 0.4383667722632876, "grad_norm": 1.8190735578536987, "learning_rate": 8.720352779426937e-06, "loss": 0.7574, "step": 6227 }, { "epoch": 0.43843717001055965, "grad_norm": 1.8899558782577515, "learning_rate": 8.718805582046666e-06, "loss": 0.7401, "step": 6228 }, { "epoch": 0.43850756775783173, "grad_norm": 1.6029268503189087, "learning_rate": 8.717258295304138e-06, "loss": 0.6922, "step": 6229 }, { "epoch": 0.43857796550510386, "grad_norm": 2.028824806213379, "learning_rate": 8.715710919279795e-06, "loss": 0.5881, "step": 6230 }, { "epoch": 0.43864836325237594, "grad_norm": 1.931077003479004, "learning_rate": 8.714163454054087e-06, "loss": 0.6717, "step": 6231 }, { "epoch": 0.438718760999648, "grad_norm": 3.9757111072540283, "learning_rate": 8.71261589970747e-06, "loss": 0.7236, "step": 6232 }, { "epoch": 0.4387891587469201, "grad_norm": 2.1985251903533936, "learning_rate": 8.7110682563204e-06, "loss": 0.5547, "step": 6233 }, { "epoch": 0.43885955649419217, "grad_norm": 1.7094374895095825, "learning_rate": 8.709520523973344e-06, "loss": 0.5994, "step": 6234 }, { "epoch": 0.43892995424146425, "grad_norm": 1.8150886297225952, "learning_rate": 8.707972702746767e-06, "loss": 0.6586, "step": 6235 }, { "epoch": 0.4390003519887364, "grad_norm": 2.054304838180542, "learning_rate": 8.70642479272114e-06, "loss": 0.7881, "step": 6236 }, { "epoch": 0.43907074973600846, "grad_norm": 2.230776786804199, "learning_rate": 8.704876793976943e-06, "loss": 0.6256, "step": 6237 }, { "epoch": 0.43914114748328054, "grad_norm": 1.8628860712051392, "learning_rate": 8.703328706594658e-06, "loss": 0.6026, "step": 6238 }, { "epoch": 0.4392115452305526, "grad_norm": 2.396927833557129, "learning_rate": 8.701780530654773e-06, "loss": 0.753, "step": 6239 }, { "epoch": 0.4392819429778247, "grad_norm": 1.949360728263855, "learning_rate": 8.700232266237776e-06, "loss": 0.7076, "step": 6240 }, { "epoch": 0.4393523407250968, "grad_norm": 4.056278705596924, "learning_rate": 8.698683913424161e-06, "loss": 0.6346, "step": 6241 }, { "epoch": 0.4394227384723689, "grad_norm": 1.9052281379699707, "learning_rate": 8.697135472294435e-06, "loss": 0.7372, "step": 6242 }, { "epoch": 0.439493136219641, "grad_norm": 1.9019471406936646, "learning_rate": 8.695586942929097e-06, "loss": 0.6892, "step": 6243 }, { "epoch": 0.43956353396691306, "grad_norm": 3.5141758918762207, "learning_rate": 8.694038325408658e-06, "loss": 0.6995, "step": 6244 }, { "epoch": 0.43963393171418513, "grad_norm": 1.7422585487365723, "learning_rate": 8.692489619813634e-06, "loss": 0.7901, "step": 6245 }, { "epoch": 0.4397043294614572, "grad_norm": 1.967261791229248, "learning_rate": 8.690940826224539e-06, "loss": 0.5889, "step": 6246 }, { "epoch": 0.43977472720872934, "grad_norm": 1.736568570137024, "learning_rate": 8.6893919447219e-06, "loss": 0.6944, "step": 6247 }, { "epoch": 0.4398451249560014, "grad_norm": 1.8167866468429565, "learning_rate": 8.687842975386245e-06, "loss": 0.6288, "step": 6248 }, { "epoch": 0.4399155227032735, "grad_norm": 1.6752080917358398, "learning_rate": 8.686293918298104e-06, "loss": 0.6355, "step": 6249 }, { "epoch": 0.4399859204505456, "grad_norm": 1.7385259866714478, "learning_rate": 8.684744773538015e-06, "loss": 0.704, "step": 6250 }, { "epoch": 0.44005631819781765, "grad_norm": 1.775976538658142, "learning_rate": 8.68319554118652e-06, "loss": 0.7186, "step": 6251 }, { "epoch": 0.44012671594508973, "grad_norm": 1.6073647737503052, "learning_rate": 8.681646221324164e-06, "loss": 0.6258, "step": 6252 }, { "epoch": 0.44019711369236186, "grad_norm": 2.1105713844299316, "learning_rate": 8.680096814031498e-06, "loss": 0.8243, "step": 6253 }, { "epoch": 0.44026751143963394, "grad_norm": 1.931691288948059, "learning_rate": 8.678547319389077e-06, "loss": 0.8118, "step": 6254 }, { "epoch": 0.440337909186906, "grad_norm": 1.8456681966781616, "learning_rate": 8.676997737477462e-06, "loss": 0.6716, "step": 6255 }, { "epoch": 0.4404083069341781, "grad_norm": 1.7427514791488647, "learning_rate": 8.675448068377212e-06, "loss": 0.5033, "step": 6256 }, { "epoch": 0.4404787046814502, "grad_norm": 3.2757933139801025, "learning_rate": 8.673898312168905e-06, "loss": 0.7578, "step": 6257 }, { "epoch": 0.4405491024287223, "grad_norm": 1.8631789684295654, "learning_rate": 8.672348468933104e-06, "loss": 0.6666, "step": 6258 }, { "epoch": 0.4406195001759944, "grad_norm": 2.0338592529296875, "learning_rate": 8.670798538750395e-06, "loss": 0.7577, "step": 6259 }, { "epoch": 0.44068989792326646, "grad_norm": 2.0199193954467773, "learning_rate": 8.669248521701354e-06, "loss": 0.7247, "step": 6260 }, { "epoch": 0.44076029567053854, "grad_norm": 2.318453073501587, "learning_rate": 8.667698417866574e-06, "loss": 0.8233, "step": 6261 }, { "epoch": 0.4408306934178106, "grad_norm": 1.9138137102127075, "learning_rate": 8.666148227326638e-06, "loss": 0.6847, "step": 6262 }, { "epoch": 0.4409010911650827, "grad_norm": 2.1254448890686035, "learning_rate": 8.664597950162151e-06, "loss": 0.7145, "step": 6263 }, { "epoch": 0.4409714889123548, "grad_norm": 2.4283809661865234, "learning_rate": 8.663047586453707e-06, "loss": 0.6722, "step": 6264 }, { "epoch": 0.4410418866596269, "grad_norm": 2.234814167022705, "learning_rate": 8.661497136281913e-06, "loss": 0.6115, "step": 6265 }, { "epoch": 0.441112284406899, "grad_norm": 1.84239661693573, "learning_rate": 8.65994659972738e-06, "loss": 0.6728, "step": 6266 }, { "epoch": 0.44118268215417106, "grad_norm": 1.8301811218261719, "learning_rate": 8.658395976870717e-06, "loss": 0.6392, "step": 6267 }, { "epoch": 0.44125307990144313, "grad_norm": 1.8437247276306152, "learning_rate": 8.656845267792547e-06, "loss": 0.6205, "step": 6268 }, { "epoch": 0.44132347764871527, "grad_norm": 1.5435819625854492, "learning_rate": 8.655294472573492e-06, "loss": 0.7961, "step": 6269 }, { "epoch": 0.44139387539598735, "grad_norm": 2.167980670928955, "learning_rate": 8.653743591294177e-06, "loss": 0.8406, "step": 6270 }, { "epoch": 0.4414642731432594, "grad_norm": 2.0197601318359375, "learning_rate": 8.652192624035233e-06, "loss": 0.722, "step": 6271 }, { "epoch": 0.4415346708905315, "grad_norm": 1.7140681743621826, "learning_rate": 8.650641570877303e-06, "loss": 0.6731, "step": 6272 }, { "epoch": 0.4416050686378036, "grad_norm": 2.046846628189087, "learning_rate": 8.649090431901018e-06, "loss": 0.7416, "step": 6273 }, { "epoch": 0.44167546638507565, "grad_norm": 2.326343059539795, "learning_rate": 8.64753920718703e-06, "loss": 0.6598, "step": 6274 }, { "epoch": 0.4417458641323478, "grad_norm": 1.9007511138916016, "learning_rate": 8.645987896815987e-06, "loss": 0.6536, "step": 6275 }, { "epoch": 0.44181626187961986, "grad_norm": 1.9517228603363037, "learning_rate": 8.644436500868542e-06, "loss": 0.7142, "step": 6276 }, { "epoch": 0.44188665962689194, "grad_norm": 2.384875774383545, "learning_rate": 8.642885019425353e-06, "loss": 0.6945, "step": 6277 }, { "epoch": 0.441957057374164, "grad_norm": 1.9869259595870972, "learning_rate": 8.641333452567085e-06, "loss": 0.6748, "step": 6278 }, { "epoch": 0.4420274551214361, "grad_norm": 2.2182424068450928, "learning_rate": 8.639781800374405e-06, "loss": 0.7776, "step": 6279 }, { "epoch": 0.4420978528687082, "grad_norm": 1.994261622428894, "learning_rate": 8.638230062927982e-06, "loss": 0.6463, "step": 6280 }, { "epoch": 0.4421682506159803, "grad_norm": 2.00596284866333, "learning_rate": 8.636678240308497e-06, "loss": 0.676, "step": 6281 }, { "epoch": 0.4422386483632524, "grad_norm": 2.538010358810425, "learning_rate": 8.635126332596627e-06, "loss": 0.5784, "step": 6282 }, { "epoch": 0.44230904611052446, "grad_norm": 1.6122359037399292, "learning_rate": 8.633574339873056e-06, "loss": 0.6722, "step": 6283 }, { "epoch": 0.44237944385779654, "grad_norm": 2.0384156703948975, "learning_rate": 8.632022262218478e-06, "loss": 0.6748, "step": 6284 }, { "epoch": 0.4424498416050686, "grad_norm": 1.9468833208084106, "learning_rate": 8.630470099713584e-06, "loss": 0.7242, "step": 6285 }, { "epoch": 0.44252023935234075, "grad_norm": 1.8797940015792847, "learning_rate": 8.628917852439074e-06, "loss": 0.7954, "step": 6286 }, { "epoch": 0.4425906370996128, "grad_norm": 2.3255364894866943, "learning_rate": 8.627365520475646e-06, "loss": 0.6649, "step": 6287 }, { "epoch": 0.4426610348468849, "grad_norm": 1.652653694152832, "learning_rate": 8.625813103904014e-06, "loss": 0.6415, "step": 6288 }, { "epoch": 0.442731432594157, "grad_norm": 1.8098775148391724, "learning_rate": 8.624260602804887e-06, "loss": 0.6064, "step": 6289 }, { "epoch": 0.44280183034142906, "grad_norm": 1.9257032871246338, "learning_rate": 8.622708017258977e-06, "loss": 0.6662, "step": 6290 }, { "epoch": 0.44287222808870114, "grad_norm": 1.704490303993225, "learning_rate": 8.62115534734701e-06, "loss": 0.6943, "step": 6291 }, { "epoch": 0.44294262583597327, "grad_norm": 1.9758707284927368, "learning_rate": 8.619602593149705e-06, "loss": 0.7102, "step": 6292 }, { "epoch": 0.44301302358324535, "grad_norm": 2.10601544380188, "learning_rate": 8.618049754747798e-06, "loss": 0.7238, "step": 6293 }, { "epoch": 0.4430834213305174, "grad_norm": 1.9898911714553833, "learning_rate": 8.616496832222019e-06, "loss": 0.7894, "step": 6294 }, { "epoch": 0.4431538190777895, "grad_norm": 2.048314094543457, "learning_rate": 8.614943825653104e-06, "loss": 0.5628, "step": 6295 }, { "epoch": 0.4432242168250616, "grad_norm": 1.7632935047149658, "learning_rate": 8.613390735121798e-06, "loss": 0.6755, "step": 6296 }, { "epoch": 0.4432946145723337, "grad_norm": 2.011080741882324, "learning_rate": 8.611837560708846e-06, "loss": 0.7195, "step": 6297 }, { "epoch": 0.4433650123196058, "grad_norm": 1.9133890867233276, "learning_rate": 8.610284302494999e-06, "loss": 0.6547, "step": 6298 }, { "epoch": 0.44343541006687787, "grad_norm": 1.8197356462478638, "learning_rate": 8.608730960561014e-06, "loss": 0.5901, "step": 6299 }, { "epoch": 0.44350580781414994, "grad_norm": 1.7483916282653809, "learning_rate": 8.607177534987648e-06, "loss": 0.6542, "step": 6300 }, { "epoch": 0.443576205561422, "grad_norm": 1.5346592664718628, "learning_rate": 8.605624025855667e-06, "loss": 0.619, "step": 6301 }, { "epoch": 0.4436466033086941, "grad_norm": 1.5495575666427612, "learning_rate": 8.60407043324584e-06, "loss": 0.6812, "step": 6302 }, { "epoch": 0.44371700105596623, "grad_norm": 1.821560025215149, "learning_rate": 8.602516757238937e-06, "loss": 0.6557, "step": 6303 }, { "epoch": 0.4437873988032383, "grad_norm": 1.5768433809280396, "learning_rate": 8.600962997915736e-06, "loss": 0.6702, "step": 6304 }, { "epoch": 0.4438577965505104, "grad_norm": 2.2786896228790283, "learning_rate": 8.599409155357019e-06, "loss": 0.607, "step": 6305 }, { "epoch": 0.44392819429778246, "grad_norm": 1.8299150466918945, "learning_rate": 8.597855229643573e-06, "loss": 0.622, "step": 6306 }, { "epoch": 0.44399859204505454, "grad_norm": 2.1357901096343994, "learning_rate": 8.596301220856184e-06, "loss": 0.8005, "step": 6307 }, { "epoch": 0.4440689897923266, "grad_norm": 1.7597311735153198, "learning_rate": 8.594747129075652e-06, "loss": 0.6725, "step": 6308 }, { "epoch": 0.44413938753959875, "grad_norm": 1.4086155891418457, "learning_rate": 8.593192954382771e-06, "loss": 0.5835, "step": 6309 }, { "epoch": 0.44420978528687083, "grad_norm": 1.9376952648162842, "learning_rate": 8.591638696858344e-06, "loss": 0.6384, "step": 6310 }, { "epoch": 0.4442801830341429, "grad_norm": 1.7554107904434204, "learning_rate": 8.590084356583182e-06, "loss": 0.6891, "step": 6311 }, { "epoch": 0.444350580781415, "grad_norm": 1.856407642364502, "learning_rate": 8.588529933638092e-06, "loss": 0.6871, "step": 6312 }, { "epoch": 0.44442097852868706, "grad_norm": 1.7802973985671997, "learning_rate": 8.586975428103893e-06, "loss": 0.648, "step": 6313 }, { "epoch": 0.4444913762759592, "grad_norm": 1.806768774986267, "learning_rate": 8.585420840061408e-06, "loss": 0.73, "step": 6314 }, { "epoch": 0.44456177402323127, "grad_norm": 1.7104696035385132, "learning_rate": 8.583866169591452e-06, "loss": 0.591, "step": 6315 }, { "epoch": 0.44463217177050335, "grad_norm": 1.682559609413147, "learning_rate": 8.582311416774861e-06, "loss": 0.6968, "step": 6316 }, { "epoch": 0.4447025695177754, "grad_norm": 1.585602045059204, "learning_rate": 8.580756581692467e-06, "loss": 0.5866, "step": 6317 }, { "epoch": 0.4447729672650475, "grad_norm": 1.6496354341506958, "learning_rate": 8.579201664425107e-06, "loss": 0.7452, "step": 6318 }, { "epoch": 0.4448433650123196, "grad_norm": 1.7225035429000854, "learning_rate": 8.577646665053622e-06, "loss": 0.7174, "step": 6319 }, { "epoch": 0.4449137627595917, "grad_norm": 1.8713361024856567, "learning_rate": 8.576091583658858e-06, "loss": 0.681, "step": 6320 }, { "epoch": 0.4449841605068638, "grad_norm": 1.70640230178833, "learning_rate": 8.574536420321666e-06, "loss": 0.6069, "step": 6321 }, { "epoch": 0.44505455825413587, "grad_norm": 1.5169999599456787, "learning_rate": 8.572981175122898e-06, "loss": 0.6365, "step": 6322 }, { "epoch": 0.44512495600140795, "grad_norm": 1.6917929649353027, "learning_rate": 8.571425848143417e-06, "loss": 0.6742, "step": 6323 }, { "epoch": 0.44519535374868, "grad_norm": 1.9512450695037842, "learning_rate": 8.56987043946408e-06, "loss": 0.708, "step": 6324 }, { "epoch": 0.44526575149595216, "grad_norm": 1.7476052045822144, "learning_rate": 8.568314949165756e-06, "loss": 0.629, "step": 6325 }, { "epoch": 0.44533614924322423, "grad_norm": 2.006992816925049, "learning_rate": 8.56675937732932e-06, "loss": 0.6352, "step": 6326 }, { "epoch": 0.4454065469904963, "grad_norm": 1.708809733390808, "learning_rate": 8.565203724035646e-06, "loss": 0.6938, "step": 6327 }, { "epoch": 0.4454769447377684, "grad_norm": 1.7910109758377075, "learning_rate": 8.56364798936561e-06, "loss": 0.6868, "step": 6328 }, { "epoch": 0.44554734248504047, "grad_norm": 2.2826085090637207, "learning_rate": 8.562092173400101e-06, "loss": 0.7186, "step": 6329 }, { "epoch": 0.44561774023231254, "grad_norm": 1.7749546766281128, "learning_rate": 8.560536276220003e-06, "loss": 0.5383, "step": 6330 }, { "epoch": 0.4456881379795847, "grad_norm": 3.45001482963562, "learning_rate": 8.558980297906211e-06, "loss": 0.8355, "step": 6331 }, { "epoch": 0.44575853572685675, "grad_norm": 1.9786021709442139, "learning_rate": 8.557424238539624e-06, "loss": 0.764, "step": 6332 }, { "epoch": 0.44582893347412883, "grad_norm": 1.7486824989318848, "learning_rate": 8.555868098201139e-06, "loss": 0.7892, "step": 6333 }, { "epoch": 0.4458993312214009, "grad_norm": 2.0367226600646973, "learning_rate": 8.554311876971663e-06, "loss": 0.7702, "step": 6334 }, { "epoch": 0.445969728968673, "grad_norm": 1.8272565603256226, "learning_rate": 8.552755574932104e-06, "loss": 0.6705, "step": 6335 }, { "epoch": 0.44604012671594506, "grad_norm": 1.8792119026184082, "learning_rate": 8.551199192163376e-06, "loss": 0.7101, "step": 6336 }, { "epoch": 0.4461105244632172, "grad_norm": 1.697383999824524, "learning_rate": 8.549642728746399e-06, "loss": 0.6111, "step": 6337 }, { "epoch": 0.4461809222104893, "grad_norm": 1.8358601331710815, "learning_rate": 8.548086184762091e-06, "loss": 0.6222, "step": 6338 }, { "epoch": 0.44625131995776135, "grad_norm": 1.8389803171157837, "learning_rate": 8.546529560291381e-06, "loss": 0.7146, "step": 6339 }, { "epoch": 0.4463217177050334, "grad_norm": 1.7974299192428589, "learning_rate": 8.5449728554152e-06, "loss": 0.7153, "step": 6340 }, { "epoch": 0.4463921154523055, "grad_norm": 2.120499610900879, "learning_rate": 8.543416070214478e-06, "loss": 0.7047, "step": 6341 }, { "epoch": 0.44646251319957764, "grad_norm": 1.9050575494766235, "learning_rate": 8.541859204770159e-06, "loss": 0.7472, "step": 6342 }, { "epoch": 0.4465329109468497, "grad_norm": 1.9294236898422241, "learning_rate": 8.540302259163183e-06, "loss": 0.765, "step": 6343 }, { "epoch": 0.4466033086941218, "grad_norm": 1.7673823833465576, "learning_rate": 8.538745233474496e-06, "loss": 0.7277, "step": 6344 }, { "epoch": 0.44667370644139387, "grad_norm": 1.711005449295044, "learning_rate": 8.537188127785053e-06, "loss": 0.6169, "step": 6345 }, { "epoch": 0.44674410418866595, "grad_norm": 1.6789143085479736, "learning_rate": 8.535630942175805e-06, "loss": 0.6977, "step": 6346 }, { "epoch": 0.446814501935938, "grad_norm": 2.1826727390289307, "learning_rate": 8.534073676727715e-06, "loss": 0.7346, "step": 6347 }, { "epoch": 0.44688489968321016, "grad_norm": 1.6849604845046997, "learning_rate": 8.532516331521745e-06, "loss": 0.6874, "step": 6348 }, { "epoch": 0.44695529743048223, "grad_norm": 1.6917200088500977, "learning_rate": 8.530958906638864e-06, "loss": 0.6969, "step": 6349 }, { "epoch": 0.4470256951777543, "grad_norm": 1.5257110595703125, "learning_rate": 8.529401402160042e-06, "loss": 0.6846, "step": 6350 }, { "epoch": 0.4470960929250264, "grad_norm": 1.8488295078277588, "learning_rate": 8.527843818166255e-06, "loss": 0.8158, "step": 6351 }, { "epoch": 0.44716649067229847, "grad_norm": 2.1414730548858643, "learning_rate": 8.526286154738484e-06, "loss": 0.6798, "step": 6352 }, { "epoch": 0.4472368884195706, "grad_norm": 1.8995546102523804, "learning_rate": 8.524728411957716e-06, "loss": 0.6346, "step": 6353 }, { "epoch": 0.4473072861668427, "grad_norm": 3.473501205444336, "learning_rate": 8.523170589904937e-06, "loss": 0.6337, "step": 6354 }, { "epoch": 0.44737768391411475, "grad_norm": 1.961373209953308, "learning_rate": 8.521612688661136e-06, "loss": 0.7278, "step": 6355 }, { "epoch": 0.44744808166138683, "grad_norm": 1.9850616455078125, "learning_rate": 8.520054708307318e-06, "loss": 0.7681, "step": 6356 }, { "epoch": 0.4475184794086589, "grad_norm": 2.060985565185547, "learning_rate": 8.518496648924476e-06, "loss": 0.6727, "step": 6357 }, { "epoch": 0.447588877155931, "grad_norm": 1.8798669576644897, "learning_rate": 8.51693851059362e-06, "loss": 0.7267, "step": 6358 }, { "epoch": 0.4476592749032031, "grad_norm": 2.332091808319092, "learning_rate": 8.515380293395755e-06, "loss": 0.6715, "step": 6359 }, { "epoch": 0.4477296726504752, "grad_norm": 1.7512682676315308, "learning_rate": 8.513821997411896e-06, "loss": 0.7188, "step": 6360 }, { "epoch": 0.4478000703977473, "grad_norm": 1.8159457445144653, "learning_rate": 8.51226362272306e-06, "loss": 0.7674, "step": 6361 }, { "epoch": 0.44787046814501935, "grad_norm": 2.173820734024048, "learning_rate": 8.51070516941027e-06, "loss": 0.6559, "step": 6362 }, { "epoch": 0.44794086589229143, "grad_norm": 2.2062060832977295, "learning_rate": 8.50914663755455e-06, "loss": 0.6844, "step": 6363 }, { "epoch": 0.44801126363956356, "grad_norm": 1.7651655673980713, "learning_rate": 8.507588027236929e-06, "loss": 0.6905, "step": 6364 }, { "epoch": 0.44808166138683564, "grad_norm": 1.7876533269882202, "learning_rate": 8.506029338538443e-06, "loss": 0.6732, "step": 6365 }, { "epoch": 0.4481520591341077, "grad_norm": 1.9728195667266846, "learning_rate": 8.504470571540126e-06, "loss": 0.603, "step": 6366 }, { "epoch": 0.4482224568813798, "grad_norm": 2.5583014488220215, "learning_rate": 8.50291172632302e-06, "loss": 0.7035, "step": 6367 }, { "epoch": 0.44829285462865187, "grad_norm": 2.3197743892669678, "learning_rate": 8.501352802968176e-06, "loss": 0.6701, "step": 6368 }, { "epoch": 0.44836325237592395, "grad_norm": 1.5735934972763062, "learning_rate": 8.499793801556638e-06, "loss": 0.7646, "step": 6369 }, { "epoch": 0.4484336501231961, "grad_norm": 2.1869311332702637, "learning_rate": 8.498234722169461e-06, "loss": 0.6513, "step": 6370 }, { "epoch": 0.44850404787046816, "grad_norm": 1.8417121171951294, "learning_rate": 8.496675564887707e-06, "loss": 0.6763, "step": 6371 }, { "epoch": 0.44857444561774024, "grad_norm": 1.8099850416183472, "learning_rate": 8.495116329792432e-06, "loss": 0.5912, "step": 6372 }, { "epoch": 0.4486448433650123, "grad_norm": 1.9449288845062256, "learning_rate": 8.493557016964705e-06, "loss": 0.82, "step": 6373 }, { "epoch": 0.4487152411122844, "grad_norm": 1.7831361293792725, "learning_rate": 8.491997626485598e-06, "loss": 0.8055, "step": 6374 }, { "epoch": 0.44878563885955647, "grad_norm": 1.5528172254562378, "learning_rate": 8.490438158436181e-06, "loss": 0.6536, "step": 6375 }, { "epoch": 0.4488560366068286, "grad_norm": 1.5252323150634766, "learning_rate": 8.488878612897534e-06, "loss": 0.7649, "step": 6376 }, { "epoch": 0.4489264343541007, "grad_norm": 1.7926220893859863, "learning_rate": 8.487318989950743e-06, "loss": 0.7721, "step": 6377 }, { "epoch": 0.44899683210137276, "grad_norm": 1.6518949270248413, "learning_rate": 8.485759289676886e-06, "loss": 0.6807, "step": 6378 }, { "epoch": 0.44906722984864483, "grad_norm": 1.587640404701233, "learning_rate": 8.48419951215706e-06, "loss": 0.6509, "step": 6379 }, { "epoch": 0.4491376275959169, "grad_norm": 1.8153634071350098, "learning_rate": 8.48263965747236e-06, "loss": 0.7493, "step": 6380 }, { "epoch": 0.44920802534318904, "grad_norm": 1.5671206712722778, "learning_rate": 8.48107972570388e-06, "loss": 0.5898, "step": 6381 }, { "epoch": 0.4492784230904611, "grad_norm": 2.0299694538116455, "learning_rate": 8.47951971693272e-06, "loss": 0.5974, "step": 6382 }, { "epoch": 0.4493488208377332, "grad_norm": 1.8423376083374023, "learning_rate": 8.477959631239995e-06, "loss": 0.7736, "step": 6383 }, { "epoch": 0.4494192185850053, "grad_norm": 1.8973731994628906, "learning_rate": 8.476399468706806e-06, "loss": 0.7167, "step": 6384 }, { "epoch": 0.44948961633227735, "grad_norm": 1.476536512374878, "learning_rate": 8.474839229414275e-06, "loss": 0.7324, "step": 6385 }, { "epoch": 0.44956001407954943, "grad_norm": 1.7342517375946045, "learning_rate": 8.473278913443514e-06, "loss": 0.7485, "step": 6386 }, { "epoch": 0.44963041182682156, "grad_norm": 1.789510726928711, "learning_rate": 8.47171852087565e-06, "loss": 0.702, "step": 6387 }, { "epoch": 0.44970080957409364, "grad_norm": 1.8374449014663696, "learning_rate": 8.470158051791807e-06, "loss": 0.641, "step": 6388 }, { "epoch": 0.4497712073213657, "grad_norm": 1.8920116424560547, "learning_rate": 8.468597506273115e-06, "loss": 0.6126, "step": 6389 }, { "epoch": 0.4498416050686378, "grad_norm": 1.8065204620361328, "learning_rate": 8.467036884400708e-06, "loss": 0.6612, "step": 6390 }, { "epoch": 0.4499120028159099, "grad_norm": 1.9629923105239868, "learning_rate": 8.465476186255726e-06, "loss": 0.7503, "step": 6391 }, { "epoch": 0.449982400563182, "grad_norm": 1.4823241233825684, "learning_rate": 8.463915411919308e-06, "loss": 0.5335, "step": 6392 }, { "epoch": 0.4500527983104541, "grad_norm": 1.980201244354248, "learning_rate": 8.462354561472604e-06, "loss": 0.6848, "step": 6393 }, { "epoch": 0.45012319605772616, "grad_norm": 1.8652297258377075, "learning_rate": 8.460793634996761e-06, "loss": 0.7042, "step": 6394 }, { "epoch": 0.45019359380499824, "grad_norm": 2.0237724781036377, "learning_rate": 8.459232632572934e-06, "loss": 0.7343, "step": 6395 }, { "epoch": 0.4502639915522703, "grad_norm": 1.9426015615463257, "learning_rate": 8.45767155428228e-06, "loss": 0.7206, "step": 6396 }, { "epoch": 0.4503343892995424, "grad_norm": 1.7671769857406616, "learning_rate": 8.456110400205964e-06, "loss": 0.7301, "step": 6397 }, { "epoch": 0.4504047870468145, "grad_norm": 1.987050175666809, "learning_rate": 8.454549170425149e-06, "loss": 0.6313, "step": 6398 }, { "epoch": 0.4504751847940866, "grad_norm": 1.7860984802246094, "learning_rate": 8.452987865021003e-06, "loss": 0.629, "step": 6399 }, { "epoch": 0.4505455825413587, "grad_norm": 1.704730749130249, "learning_rate": 8.451426484074704e-06, "loss": 0.7402, "step": 6400 }, { "epoch": 0.45061598028863076, "grad_norm": 1.8385753631591797, "learning_rate": 8.449865027667426e-06, "loss": 0.824, "step": 6401 }, { "epoch": 0.45068637803590283, "grad_norm": 1.815189003944397, "learning_rate": 8.448303495880353e-06, "loss": 0.6319, "step": 6402 }, { "epoch": 0.4507567757831749, "grad_norm": 1.5959951877593994, "learning_rate": 8.44674188879467e-06, "loss": 0.72, "step": 6403 }, { "epoch": 0.45082717353044705, "grad_norm": 1.5302352905273438, "learning_rate": 8.445180206491568e-06, "loss": 0.6327, "step": 6404 }, { "epoch": 0.4508975712777191, "grad_norm": 1.7185523509979248, "learning_rate": 8.443618449052234e-06, "loss": 0.7544, "step": 6405 }, { "epoch": 0.4509679690249912, "grad_norm": 1.740355372428894, "learning_rate": 8.442056616557872e-06, "loss": 0.5326, "step": 6406 }, { "epoch": 0.4510383667722633, "grad_norm": 1.643223524093628, "learning_rate": 8.440494709089681e-06, "loss": 0.7458, "step": 6407 }, { "epoch": 0.45110876451953535, "grad_norm": 1.7322330474853516, "learning_rate": 8.438932726728864e-06, "loss": 0.6826, "step": 6408 }, { "epoch": 0.4511791622668075, "grad_norm": 1.7802765369415283, "learning_rate": 8.43737066955663e-06, "loss": 0.7513, "step": 6409 }, { "epoch": 0.45124956001407956, "grad_norm": 1.9850162267684937, "learning_rate": 8.435808537654197e-06, "loss": 0.7435, "step": 6410 }, { "epoch": 0.45131995776135164, "grad_norm": 1.7720022201538086, "learning_rate": 8.434246331102774e-06, "loss": 0.6572, "step": 6411 }, { "epoch": 0.4513903555086237, "grad_norm": 1.5988835096359253, "learning_rate": 8.432684049983588e-06, "loss": 0.7137, "step": 6412 }, { "epoch": 0.4514607532558958, "grad_norm": 1.958312749862671, "learning_rate": 8.43112169437786e-06, "loss": 0.7156, "step": 6413 }, { "epoch": 0.4515311510031679, "grad_norm": 1.6222649812698364, "learning_rate": 8.429559264366819e-06, "loss": 0.6859, "step": 6414 }, { "epoch": 0.45160154875044, "grad_norm": 2.391329050064087, "learning_rate": 8.427996760031697e-06, "loss": 0.5955, "step": 6415 }, { "epoch": 0.4516719464977121, "grad_norm": 1.5073379278182983, "learning_rate": 8.42643418145373e-06, "loss": 0.6563, "step": 6416 }, { "epoch": 0.45174234424498416, "grad_norm": 1.6070226430892944, "learning_rate": 8.42487152871416e-06, "loss": 0.5699, "step": 6417 }, { "epoch": 0.45181274199225624, "grad_norm": 1.7635143995285034, "learning_rate": 8.423308801894226e-06, "loss": 0.6764, "step": 6418 }, { "epoch": 0.4518831397395283, "grad_norm": 1.908990502357483, "learning_rate": 8.42174600107518e-06, "loss": 0.795, "step": 6419 }, { "epoch": 0.45195353748680045, "grad_norm": 1.9450206756591797, "learning_rate": 8.42018312633827e-06, "loss": 0.7642, "step": 6420 }, { "epoch": 0.4520239352340725, "grad_norm": 2.3734629154205322, "learning_rate": 8.418620177764754e-06, "loss": 0.736, "step": 6421 }, { "epoch": 0.4520943329813446, "grad_norm": 1.9147963523864746, "learning_rate": 8.417057155435892e-06, "loss": 0.5745, "step": 6422 }, { "epoch": 0.4521647307286167, "grad_norm": 1.5973215103149414, "learning_rate": 8.415494059432942e-06, "loss": 0.6884, "step": 6423 }, { "epoch": 0.45223512847588876, "grad_norm": 2.3663952350616455, "learning_rate": 8.413930889837176e-06, "loss": 0.565, "step": 6424 }, { "epoch": 0.45230552622316084, "grad_norm": 1.901071310043335, "learning_rate": 8.412367646729863e-06, "loss": 0.7204, "step": 6425 }, { "epoch": 0.45237592397043297, "grad_norm": 1.717690348625183, "learning_rate": 8.410804330192274e-06, "loss": 0.6646, "step": 6426 }, { "epoch": 0.45244632171770505, "grad_norm": 1.8056989908218384, "learning_rate": 8.409240940305693e-06, "loss": 0.6593, "step": 6427 }, { "epoch": 0.4525167194649771, "grad_norm": 1.6623942852020264, "learning_rate": 8.407677477151397e-06, "loss": 0.6696, "step": 6428 }, { "epoch": 0.4525871172122492, "grad_norm": 2.371659755706787, "learning_rate": 8.406113940810677e-06, "loss": 0.7437, "step": 6429 }, { "epoch": 0.4526575149595213, "grad_norm": 1.6212918758392334, "learning_rate": 8.404550331364817e-06, "loss": 0.6973, "step": 6430 }, { "epoch": 0.45272791270679336, "grad_norm": 1.8169225454330444, "learning_rate": 8.402986648895114e-06, "loss": 0.7012, "step": 6431 }, { "epoch": 0.4527983104540655, "grad_norm": 1.6850754022598267, "learning_rate": 8.401422893482866e-06, "loss": 0.7097, "step": 6432 }, { "epoch": 0.45286870820133757, "grad_norm": 2.287299394607544, "learning_rate": 8.39985906520937e-06, "loss": 0.6857, "step": 6433 }, { "epoch": 0.45293910594860964, "grad_norm": 1.63023042678833, "learning_rate": 8.398295164155936e-06, "loss": 0.6375, "step": 6434 }, { "epoch": 0.4530095036958817, "grad_norm": 1.7578952312469482, "learning_rate": 8.396731190403869e-06, "loss": 0.7582, "step": 6435 }, { "epoch": 0.4530799014431538, "grad_norm": 1.8032450675964355, "learning_rate": 8.395167144034482e-06, "loss": 0.8093, "step": 6436 }, { "epoch": 0.45315029919042593, "grad_norm": 1.8843311071395874, "learning_rate": 8.393603025129091e-06, "loss": 0.6403, "step": 6437 }, { "epoch": 0.453220696937698, "grad_norm": 1.5403364896774292, "learning_rate": 8.392038833769017e-06, "loss": 0.697, "step": 6438 }, { "epoch": 0.4532910946849701, "grad_norm": 1.7118875980377197, "learning_rate": 8.390474570035585e-06, "loss": 0.7233, "step": 6439 }, { "epoch": 0.45336149243224216, "grad_norm": 1.9960507154464722, "learning_rate": 8.388910234010119e-06, "loss": 0.6329, "step": 6440 }, { "epoch": 0.45343189017951424, "grad_norm": 1.6616812944412231, "learning_rate": 8.387345825773952e-06, "loss": 0.7002, "step": 6441 }, { "epoch": 0.4535022879267863, "grad_norm": 1.7728782892227173, "learning_rate": 8.385781345408421e-06, "loss": 0.7576, "step": 6442 }, { "epoch": 0.45357268567405845, "grad_norm": 1.7087082862854004, "learning_rate": 8.384216792994861e-06, "loss": 0.685, "step": 6443 }, { "epoch": 0.45364308342133053, "grad_norm": 2.1502537727355957, "learning_rate": 8.382652168614618e-06, "loss": 0.6103, "step": 6444 }, { "epoch": 0.4537134811686026, "grad_norm": 1.834885597229004, "learning_rate": 8.381087472349036e-06, "loss": 0.8068, "step": 6445 }, { "epoch": 0.4537838789158747, "grad_norm": 1.999954104423523, "learning_rate": 8.379522704279464e-06, "loss": 0.7845, "step": 6446 }, { "epoch": 0.45385427666314676, "grad_norm": 1.889430046081543, "learning_rate": 8.377957864487258e-06, "loss": 0.8001, "step": 6447 }, { "epoch": 0.4539246744104189, "grad_norm": 2.2373530864715576, "learning_rate": 8.376392953053776e-06, "loss": 0.7459, "step": 6448 }, { "epoch": 0.45399507215769097, "grad_norm": 2.1885812282562256, "learning_rate": 8.374827970060377e-06, "loss": 0.7705, "step": 6449 }, { "epoch": 0.45406546990496305, "grad_norm": 1.7674075365066528, "learning_rate": 8.373262915588426e-06, "loss": 0.6809, "step": 6450 }, { "epoch": 0.4541358676522351, "grad_norm": 1.7615021467208862, "learning_rate": 8.371697789719295e-06, "loss": 0.6719, "step": 6451 }, { "epoch": 0.4542062653995072, "grad_norm": 1.8480976819992065, "learning_rate": 8.37013259253435e-06, "loss": 0.7251, "step": 6452 }, { "epoch": 0.4542766631467793, "grad_norm": 1.5720670223236084, "learning_rate": 8.36856732411497e-06, "loss": 0.755, "step": 6453 }, { "epoch": 0.4543470608940514, "grad_norm": 1.4958299398422241, "learning_rate": 8.367001984542538e-06, "loss": 0.6995, "step": 6454 }, { "epoch": 0.4544174586413235, "grad_norm": 1.6497315168380737, "learning_rate": 8.365436573898435e-06, "loss": 0.6795, "step": 6455 }, { "epoch": 0.45448785638859557, "grad_norm": 1.772700309753418, "learning_rate": 8.363871092264047e-06, "loss": 0.7194, "step": 6456 }, { "epoch": 0.45455825413586765, "grad_norm": 1.74611234664917, "learning_rate": 8.362305539720764e-06, "loss": 0.6121, "step": 6457 }, { "epoch": 0.4546286518831397, "grad_norm": 1.7158839702606201, "learning_rate": 8.360739916349986e-06, "loss": 0.6236, "step": 6458 }, { "epoch": 0.4546990496304118, "grad_norm": 1.7607861757278442, "learning_rate": 8.359174222233105e-06, "loss": 0.6326, "step": 6459 }, { "epoch": 0.45476944737768393, "grad_norm": 2.091641902923584, "learning_rate": 8.357608457451525e-06, "loss": 0.6804, "step": 6460 }, { "epoch": 0.454839845124956, "grad_norm": 1.99857497215271, "learning_rate": 8.356042622086656e-06, "loss": 0.6771, "step": 6461 }, { "epoch": 0.4549102428722281, "grad_norm": 2.0967626571655273, "learning_rate": 8.354476716219897e-06, "loss": 0.6723, "step": 6462 }, { "epoch": 0.45498064061950017, "grad_norm": 1.8475315570831299, "learning_rate": 8.35291073993267e-06, "loss": 0.6287, "step": 6463 }, { "epoch": 0.45505103836677224, "grad_norm": 1.671162724494934, "learning_rate": 8.351344693306387e-06, "loss": 0.6557, "step": 6464 }, { "epoch": 0.4551214361140444, "grad_norm": 1.7373467683792114, "learning_rate": 8.34977857642247e-06, "loss": 0.7385, "step": 6465 }, { "epoch": 0.45519183386131645, "grad_norm": 1.8897998332977295, "learning_rate": 8.348212389362344e-06, "loss": 0.6001, "step": 6466 }, { "epoch": 0.45526223160858853, "grad_norm": 1.7477630376815796, "learning_rate": 8.346646132207433e-06, "loss": 0.7074, "step": 6467 }, { "epoch": 0.4553326293558606, "grad_norm": 1.9097415208816528, "learning_rate": 8.34507980503917e-06, "loss": 0.5947, "step": 6468 }, { "epoch": 0.4554030271031327, "grad_norm": 1.7260587215423584, "learning_rate": 8.34351340793899e-06, "loss": 0.6873, "step": 6469 }, { "epoch": 0.45547342485040476, "grad_norm": 1.7816095352172852, "learning_rate": 8.341946940988332e-06, "loss": 0.7284, "step": 6470 }, { "epoch": 0.4555438225976769, "grad_norm": 1.9860618114471436, "learning_rate": 8.340380404268637e-06, "loss": 0.7141, "step": 6471 }, { "epoch": 0.455614220344949, "grad_norm": 1.7601209878921509, "learning_rate": 8.338813797861349e-06, "loss": 0.7179, "step": 6472 }, { "epoch": 0.45568461809222105, "grad_norm": 1.7660984992980957, "learning_rate": 8.337247121847923e-06, "loss": 0.7, "step": 6473 }, { "epoch": 0.4557550158394931, "grad_norm": 1.6910138130187988, "learning_rate": 8.335680376309804e-06, "loss": 0.6627, "step": 6474 }, { "epoch": 0.4558254135867652, "grad_norm": 1.7947163581848145, "learning_rate": 8.334113561328454e-06, "loss": 0.6799, "step": 6475 }, { "epoch": 0.45589581133403734, "grad_norm": 1.8495246171951294, "learning_rate": 8.332546676985333e-06, "loss": 0.775, "step": 6476 }, { "epoch": 0.4559662090813094, "grad_norm": 1.547760248184204, "learning_rate": 8.330979723361901e-06, "loss": 0.7226, "step": 6477 }, { "epoch": 0.4560366068285815, "grad_norm": 1.907615065574646, "learning_rate": 8.329412700539629e-06, "loss": 0.7405, "step": 6478 }, { "epoch": 0.45610700457585357, "grad_norm": 1.9402505159378052, "learning_rate": 8.327845608599988e-06, "loss": 0.7197, "step": 6479 }, { "epoch": 0.45617740232312565, "grad_norm": 1.864632248878479, "learning_rate": 8.32627844762445e-06, "loss": 0.7657, "step": 6480 }, { "epoch": 0.4562478000703977, "grad_norm": 2.1043126583099365, "learning_rate": 8.324711217694493e-06, "loss": 0.6144, "step": 6481 }, { "epoch": 0.45631819781766986, "grad_norm": 1.8717507123947144, "learning_rate": 8.323143918891603e-06, "loss": 0.7642, "step": 6482 }, { "epoch": 0.45638859556494193, "grad_norm": 1.733496069908142, "learning_rate": 8.32157655129726e-06, "loss": 0.7064, "step": 6483 }, { "epoch": 0.456458993312214, "grad_norm": 1.6473946571350098, "learning_rate": 8.320009114992955e-06, "loss": 0.6854, "step": 6484 }, { "epoch": 0.4565293910594861, "grad_norm": 1.8370890617370605, "learning_rate": 8.318441610060185e-06, "loss": 0.7384, "step": 6485 }, { "epoch": 0.45659978880675817, "grad_norm": 1.649048089981079, "learning_rate": 8.316874036580436e-06, "loss": 0.7737, "step": 6486 }, { "epoch": 0.45667018655403024, "grad_norm": 5.046268463134766, "learning_rate": 8.315306394635216e-06, "loss": 0.6967, "step": 6487 }, { "epoch": 0.4567405843013024, "grad_norm": 1.8136941194534302, "learning_rate": 8.313738684306026e-06, "loss": 0.6926, "step": 6488 }, { "epoch": 0.45681098204857445, "grad_norm": 1.6416704654693604, "learning_rate": 8.312170905674369e-06, "loss": 0.633, "step": 6489 }, { "epoch": 0.45688137979584653, "grad_norm": 1.781707525253296, "learning_rate": 8.31060305882176e-06, "loss": 0.6994, "step": 6490 }, { "epoch": 0.4569517775431186, "grad_norm": 1.6545485258102417, "learning_rate": 8.309035143829712e-06, "loss": 0.6809, "step": 6491 }, { "epoch": 0.4570221752903907, "grad_norm": 2.1246144771575928, "learning_rate": 8.30746716077974e-06, "loss": 0.6907, "step": 6492 }, { "epoch": 0.4570925730376628, "grad_norm": 1.822157859802246, "learning_rate": 8.305899109753368e-06, "loss": 0.7472, "step": 6493 }, { "epoch": 0.4571629707849349, "grad_norm": 1.6629095077514648, "learning_rate": 8.304330990832116e-06, "loss": 0.6717, "step": 6494 }, { "epoch": 0.457233368532207, "grad_norm": 2.361196279525757, "learning_rate": 8.302762804097515e-06, "loss": 0.6202, "step": 6495 }, { "epoch": 0.45730376627947905, "grad_norm": 1.7035309076309204, "learning_rate": 8.301194549631099e-06, "loss": 0.6559, "step": 6496 }, { "epoch": 0.45737416402675113, "grad_norm": 1.996356725692749, "learning_rate": 8.299626227514397e-06, "loss": 0.6567, "step": 6497 }, { "epoch": 0.4574445617740232, "grad_norm": 1.8156561851501465, "learning_rate": 8.29805783782895e-06, "loss": 0.677, "step": 6498 }, { "epoch": 0.45751495952129534, "grad_norm": 1.7694058418273926, "learning_rate": 8.296489380656302e-06, "loss": 0.6172, "step": 6499 }, { "epoch": 0.4575853572685674, "grad_norm": 1.8041995763778687, "learning_rate": 8.294920856077998e-06, "loss": 0.8461, "step": 6500 }, { "epoch": 0.4576557550158395, "grad_norm": 1.9123862981796265, "learning_rate": 8.293352264175583e-06, "loss": 0.5966, "step": 6501 }, { "epoch": 0.45772615276311157, "grad_norm": 1.6095386743545532, "learning_rate": 8.291783605030614e-06, "loss": 0.7486, "step": 6502 }, { "epoch": 0.45779655051038365, "grad_norm": 2.0836477279663086, "learning_rate": 8.290214878724645e-06, "loss": 0.718, "step": 6503 }, { "epoch": 0.4578669482576558, "grad_norm": 1.7481831312179565, "learning_rate": 8.288646085339237e-06, "loss": 0.715, "step": 6504 }, { "epoch": 0.45793734600492786, "grad_norm": 2.1030187606811523, "learning_rate": 8.28707722495595e-06, "loss": 0.6823, "step": 6505 }, { "epoch": 0.45800774375219994, "grad_norm": 1.7742618322372437, "learning_rate": 8.285508297656355e-06, "loss": 0.6833, "step": 6506 }, { "epoch": 0.458078141499472, "grad_norm": 1.6606049537658691, "learning_rate": 8.283939303522017e-06, "loss": 0.6809, "step": 6507 }, { "epoch": 0.4581485392467441, "grad_norm": 1.6250191926956177, "learning_rate": 8.282370242634512e-06, "loss": 0.7054, "step": 6508 }, { "epoch": 0.45821893699401617, "grad_norm": 1.826456904411316, "learning_rate": 8.280801115075421e-06, "loss": 0.8157, "step": 6509 }, { "epoch": 0.4582893347412883, "grad_norm": 2.0603747367858887, "learning_rate": 8.279231920926316e-06, "loss": 0.7196, "step": 6510 }, { "epoch": 0.4583597324885604, "grad_norm": 1.7439708709716797, "learning_rate": 8.277662660268784e-06, "loss": 0.7661, "step": 6511 }, { "epoch": 0.45843013023583246, "grad_norm": 2.1767895221710205, "learning_rate": 8.276093333184416e-06, "loss": 0.6346, "step": 6512 }, { "epoch": 0.45850052798310453, "grad_norm": 2.667929172515869, "learning_rate": 8.274523939754798e-06, "loss": 0.7907, "step": 6513 }, { "epoch": 0.4585709257303766, "grad_norm": 2.6660072803497314, "learning_rate": 8.272954480061526e-06, "loss": 0.6984, "step": 6514 }, { "epoch": 0.4586413234776487, "grad_norm": 1.6459259986877441, "learning_rate": 8.2713849541862e-06, "loss": 0.6881, "step": 6515 }, { "epoch": 0.4587117212249208, "grad_norm": 1.809149146080017, "learning_rate": 8.269815362210417e-06, "loss": 0.6379, "step": 6516 }, { "epoch": 0.4587821189721929, "grad_norm": 1.9763376712799072, "learning_rate": 8.268245704215782e-06, "loss": 0.7481, "step": 6517 }, { "epoch": 0.458852516719465, "grad_norm": 1.8157546520233154, "learning_rate": 8.266675980283906e-06, "loss": 0.7958, "step": 6518 }, { "epoch": 0.45892291446673705, "grad_norm": 2.0433781147003174, "learning_rate": 8.2651061904964e-06, "loss": 0.6341, "step": 6519 }, { "epoch": 0.45899331221400913, "grad_norm": 1.9241418838500977, "learning_rate": 8.263536334934876e-06, "loss": 0.6372, "step": 6520 }, { "epoch": 0.45906370996128126, "grad_norm": 1.660119652748108, "learning_rate": 8.261966413680953e-06, "loss": 0.6578, "step": 6521 }, { "epoch": 0.45913410770855334, "grad_norm": 1.580331802368164, "learning_rate": 8.260396426816254e-06, "loss": 0.6844, "step": 6522 }, { "epoch": 0.4592045054558254, "grad_norm": 1.7798608541488647, "learning_rate": 8.2588263744224e-06, "loss": 0.6466, "step": 6523 }, { "epoch": 0.4592749032030975, "grad_norm": 2.0660674571990967, "learning_rate": 8.257256256581028e-06, "loss": 0.6268, "step": 6524 }, { "epoch": 0.4593453009503696, "grad_norm": 1.9902422428131104, "learning_rate": 8.255686073373763e-06, "loss": 0.6995, "step": 6525 }, { "epoch": 0.45941569869764165, "grad_norm": 1.6064355373382568, "learning_rate": 8.25411582488224e-06, "loss": 0.6101, "step": 6526 }, { "epoch": 0.4594860964449138, "grad_norm": 2.172921657562256, "learning_rate": 8.252545511188101e-06, "loss": 0.7372, "step": 6527 }, { "epoch": 0.45955649419218586, "grad_norm": 1.6394591331481934, "learning_rate": 8.250975132372985e-06, "loss": 0.6041, "step": 6528 }, { "epoch": 0.45962689193945794, "grad_norm": 1.8856136798858643, "learning_rate": 8.249404688518541e-06, "loss": 0.6772, "step": 6529 }, { "epoch": 0.45969728968673, "grad_norm": 1.6956474781036377, "learning_rate": 8.247834179706416e-06, "loss": 0.7632, "step": 6530 }, { "epoch": 0.4597676874340021, "grad_norm": 1.7499394416809082, "learning_rate": 8.24626360601826e-06, "loss": 0.6841, "step": 6531 }, { "epoch": 0.4598380851812742, "grad_norm": 1.6191076040267944, "learning_rate": 8.244692967535731e-06, "loss": 0.7055, "step": 6532 }, { "epoch": 0.4599084829285463, "grad_norm": 1.703787088394165, "learning_rate": 8.243122264340488e-06, "loss": 0.6539, "step": 6533 }, { "epoch": 0.4599788806758184, "grad_norm": 1.7421952486038208, "learning_rate": 8.241551496514192e-06, "loss": 0.7188, "step": 6534 }, { "epoch": 0.46004927842309046, "grad_norm": 1.825573444366455, "learning_rate": 8.239980664138508e-06, "loss": 0.5784, "step": 6535 }, { "epoch": 0.46011967617036253, "grad_norm": 2.005048990249634, "learning_rate": 8.238409767295109e-06, "loss": 0.7304, "step": 6536 }, { "epoch": 0.4601900739176346, "grad_norm": 1.6310120820999146, "learning_rate": 8.236838806065661e-06, "loss": 0.7717, "step": 6537 }, { "epoch": 0.46026047166490675, "grad_norm": 1.8763476610183716, "learning_rate": 8.235267780531845e-06, "loss": 0.6683, "step": 6538 }, { "epoch": 0.4603308694121788, "grad_norm": 1.8268344402313232, "learning_rate": 8.23369669077534e-06, "loss": 0.8291, "step": 6539 }, { "epoch": 0.4604012671594509, "grad_norm": 1.7157946825027466, "learning_rate": 8.232125536877826e-06, "loss": 0.5997, "step": 6540 }, { "epoch": 0.460471664906723, "grad_norm": 1.7959750890731812, "learning_rate": 8.23055431892099e-06, "loss": 0.7604, "step": 6541 }, { "epoch": 0.46054206265399505, "grad_norm": 1.571955919265747, "learning_rate": 8.228983036986519e-06, "loss": 0.7025, "step": 6542 }, { "epoch": 0.46061246040126713, "grad_norm": 2.3646934032440186, "learning_rate": 8.227411691156107e-06, "loss": 0.7397, "step": 6543 }, { "epoch": 0.46068285814853926, "grad_norm": 2.533334970474243, "learning_rate": 8.22584028151145e-06, "loss": 0.6445, "step": 6544 }, { "epoch": 0.46075325589581134, "grad_norm": 1.8560974597930908, "learning_rate": 8.224268808134248e-06, "loss": 0.753, "step": 6545 }, { "epoch": 0.4608236536430834, "grad_norm": 1.609652042388916, "learning_rate": 8.222697271106198e-06, "loss": 0.7598, "step": 6546 }, { "epoch": 0.4608940513903555, "grad_norm": 2.5884835720062256, "learning_rate": 8.221125670509014e-06, "loss": 0.7305, "step": 6547 }, { "epoch": 0.4609644491376276, "grad_norm": 1.9874389171600342, "learning_rate": 8.219554006424397e-06, "loss": 0.7265, "step": 6548 }, { "epoch": 0.4610348468848997, "grad_norm": 1.7227306365966797, "learning_rate": 8.217982278934064e-06, "loss": 0.5141, "step": 6549 }, { "epoch": 0.4611052446321718, "grad_norm": 1.3557060956954956, "learning_rate": 8.216410488119731e-06, "loss": 0.7381, "step": 6550 }, { "epoch": 0.46117564237944386, "grad_norm": 2.72552752494812, "learning_rate": 8.214838634063113e-06, "loss": 0.7391, "step": 6551 }, { "epoch": 0.46124604012671594, "grad_norm": 1.6267317533493042, "learning_rate": 8.213266716845935e-06, "loss": 0.5588, "step": 6552 }, { "epoch": 0.461316437873988, "grad_norm": 1.951943278312683, "learning_rate": 8.211694736549923e-06, "loss": 0.707, "step": 6553 }, { "epoch": 0.4613868356212601, "grad_norm": 2.00592303276062, "learning_rate": 8.2101226932568e-06, "loss": 0.7147, "step": 6554 }, { "epoch": 0.4614572333685322, "grad_norm": 1.9252140522003174, "learning_rate": 8.208550587048306e-06, "loss": 0.5928, "step": 6555 }, { "epoch": 0.4615276311158043, "grad_norm": 2.098562717437744, "learning_rate": 8.206978418006173e-06, "loss": 0.6783, "step": 6556 }, { "epoch": 0.4615980288630764, "grad_norm": 1.7309396266937256, "learning_rate": 8.205406186212137e-06, "loss": 0.6597, "step": 6557 }, { "epoch": 0.46166842661034846, "grad_norm": 1.729174017906189, "learning_rate": 8.203833891747941e-06, "loss": 0.6829, "step": 6558 }, { "epoch": 0.46173882435762054, "grad_norm": 2.060007095336914, "learning_rate": 8.202261534695331e-06, "loss": 0.7486, "step": 6559 }, { "epoch": 0.46180922210489267, "grad_norm": 1.8859670162200928, "learning_rate": 8.200689115136056e-06, "loss": 0.7116, "step": 6560 }, { "epoch": 0.46187961985216475, "grad_norm": 1.5308682918548584, "learning_rate": 8.199116633151866e-06, "loss": 0.5684, "step": 6561 }, { "epoch": 0.4619500175994368, "grad_norm": 1.9938805103302002, "learning_rate": 8.197544088824516e-06, "loss": 0.8211, "step": 6562 }, { "epoch": 0.4620204153467089, "grad_norm": 2.314110279083252, "learning_rate": 8.195971482235765e-06, "loss": 0.7142, "step": 6563 }, { "epoch": 0.462090813093981, "grad_norm": 1.6516205072402954, "learning_rate": 8.194398813467375e-06, "loss": 0.6756, "step": 6564 }, { "epoch": 0.46216121084125306, "grad_norm": 1.8000315427780151, "learning_rate": 8.192826082601105e-06, "loss": 0.6751, "step": 6565 }, { "epoch": 0.4622316085885252, "grad_norm": 1.874444603919983, "learning_rate": 8.191253289718731e-06, "loss": 0.786, "step": 6566 }, { "epoch": 0.46230200633579727, "grad_norm": 1.71653413772583, "learning_rate": 8.189680434902015e-06, "loss": 0.7403, "step": 6567 }, { "epoch": 0.46237240408306934, "grad_norm": 1.6900938749313354, "learning_rate": 8.18810751823274e-06, "loss": 0.7115, "step": 6568 }, { "epoch": 0.4624428018303414, "grad_norm": 1.6905124187469482, "learning_rate": 8.186534539792677e-06, "loss": 0.6126, "step": 6569 }, { "epoch": 0.4625131995776135, "grad_norm": 1.8259541988372803, "learning_rate": 8.184961499663608e-06, "loss": 0.6083, "step": 6570 }, { "epoch": 0.4625835973248856, "grad_norm": 2.0162506103515625, "learning_rate": 8.18338839792732e-06, "loss": 0.676, "step": 6571 }, { "epoch": 0.4626539950721577, "grad_norm": 1.8687485456466675, "learning_rate": 8.181815234665597e-06, "loss": 0.6406, "step": 6572 }, { "epoch": 0.4627243928194298, "grad_norm": 1.7021446228027344, "learning_rate": 8.180242009960226e-06, "loss": 0.7442, "step": 6573 }, { "epoch": 0.46279479056670186, "grad_norm": 1.7091535329818726, "learning_rate": 8.178668723893006e-06, "loss": 0.6899, "step": 6574 }, { "epoch": 0.46286518831397394, "grad_norm": 1.8356451988220215, "learning_rate": 8.177095376545733e-06, "loss": 0.5627, "step": 6575 }, { "epoch": 0.462935586061246, "grad_norm": 1.6926594972610474, "learning_rate": 8.175521968000207e-06, "loss": 0.5969, "step": 6576 }, { "epoch": 0.46300598380851815, "grad_norm": 2.181825876235962, "learning_rate": 8.173948498338224e-06, "loss": 0.7504, "step": 6577 }, { "epoch": 0.46307638155579023, "grad_norm": 1.7970621585845947, "learning_rate": 8.1723749676416e-06, "loss": 0.6694, "step": 6578 }, { "epoch": 0.4631467793030623, "grad_norm": 1.6287323236465454, "learning_rate": 8.170801375992138e-06, "loss": 0.6469, "step": 6579 }, { "epoch": 0.4632171770503344, "grad_norm": 2.1474478244781494, "learning_rate": 8.16922772347165e-06, "loss": 0.7079, "step": 6580 }, { "epoch": 0.46328757479760646, "grad_norm": 1.7716054916381836, "learning_rate": 8.167654010161957e-06, "loss": 0.7425, "step": 6581 }, { "epoch": 0.46335797254487854, "grad_norm": 2.1084840297698975, "learning_rate": 8.166080236144873e-06, "loss": 0.7847, "step": 6582 }, { "epoch": 0.46342837029215067, "grad_norm": 1.7810825109481812, "learning_rate": 8.16450640150222e-06, "loss": 0.8875, "step": 6583 }, { "epoch": 0.46349876803942275, "grad_norm": 2.373051404953003, "learning_rate": 8.162932506315828e-06, "loss": 0.7419, "step": 6584 }, { "epoch": 0.4635691657866948, "grad_norm": 1.538630723953247, "learning_rate": 8.16135855066752e-06, "loss": 0.7528, "step": 6585 }, { "epoch": 0.4636395635339669, "grad_norm": 1.7912665605545044, "learning_rate": 8.159784534639128e-06, "loss": 0.6331, "step": 6586 }, { "epoch": 0.463709961281239, "grad_norm": 1.8173837661743164, "learning_rate": 8.158210458312489e-06, "loss": 0.6998, "step": 6587 }, { "epoch": 0.4637803590285111, "grad_norm": 2.189375400543213, "learning_rate": 8.156636321769439e-06, "loss": 0.7337, "step": 6588 }, { "epoch": 0.4638507567757832, "grad_norm": 1.5301917791366577, "learning_rate": 8.15506212509182e-06, "loss": 0.5984, "step": 6589 }, { "epoch": 0.46392115452305527, "grad_norm": 1.8833513259887695, "learning_rate": 8.153487868361474e-06, "loss": 0.7232, "step": 6590 }, { "epoch": 0.46399155227032735, "grad_norm": 1.9034265279769897, "learning_rate": 8.151913551660247e-06, "loss": 0.7246, "step": 6591 }, { "epoch": 0.4640619500175994, "grad_norm": 1.7011418342590332, "learning_rate": 8.150339175069995e-06, "loss": 0.6213, "step": 6592 }, { "epoch": 0.4641323477648715, "grad_norm": 2.1070287227630615, "learning_rate": 8.148764738672567e-06, "loss": 0.712, "step": 6593 }, { "epoch": 0.46420274551214363, "grad_norm": 1.6628282070159912, "learning_rate": 8.147190242549817e-06, "loss": 0.6483, "step": 6594 }, { "epoch": 0.4642731432594157, "grad_norm": 2.0533814430236816, "learning_rate": 8.14561568678361e-06, "loss": 0.6931, "step": 6595 }, { "epoch": 0.4643435410066878, "grad_norm": 1.799839735031128, "learning_rate": 8.144041071455807e-06, "loss": 0.5362, "step": 6596 }, { "epoch": 0.46441393875395987, "grad_norm": 1.8561105728149414, "learning_rate": 8.142466396648269e-06, "loss": 0.7522, "step": 6597 }, { "epoch": 0.46448433650123194, "grad_norm": 1.7272330522537231, "learning_rate": 8.140891662442871e-06, "loss": 0.6589, "step": 6598 }, { "epoch": 0.4645547342485041, "grad_norm": 1.8416380882263184, "learning_rate": 8.13931686892148e-06, "loss": 0.6599, "step": 6599 }, { "epoch": 0.46462513199577615, "grad_norm": 1.8790628910064697, "learning_rate": 8.137742016165974e-06, "loss": 0.719, "step": 6600 }, { "epoch": 0.46469552974304823, "grad_norm": 1.7608462572097778, "learning_rate": 8.136167104258233e-06, "loss": 0.6545, "step": 6601 }, { "epoch": 0.4647659274903203, "grad_norm": 1.9088890552520752, "learning_rate": 8.134592133280133e-06, "loss": 0.6914, "step": 6602 }, { "epoch": 0.4648363252375924, "grad_norm": 1.8374568223953247, "learning_rate": 8.133017103313559e-06, "loss": 0.6451, "step": 6603 }, { "epoch": 0.46490672298486446, "grad_norm": 1.595694661140442, "learning_rate": 8.131442014440402e-06, "loss": 0.5943, "step": 6604 }, { "epoch": 0.4649771207321366, "grad_norm": 1.508575439453125, "learning_rate": 8.129866866742549e-06, "loss": 0.6643, "step": 6605 }, { "epoch": 0.4650475184794087, "grad_norm": 1.9670284986495972, "learning_rate": 8.128291660301895e-06, "loss": 0.7253, "step": 6606 }, { "epoch": 0.46511791622668075, "grad_norm": 5.888091564178467, "learning_rate": 8.126716395200335e-06, "loss": 0.6948, "step": 6607 }, { "epoch": 0.4651883139739528, "grad_norm": 1.9224740266799927, "learning_rate": 8.125141071519767e-06, "loss": 0.6226, "step": 6608 }, { "epoch": 0.4652587117212249, "grad_norm": 1.6967793703079224, "learning_rate": 8.1235656893421e-06, "loss": 0.7245, "step": 6609 }, { "epoch": 0.465329109468497, "grad_norm": 1.786620020866394, "learning_rate": 8.121990248749233e-06, "loss": 0.7128, "step": 6610 }, { "epoch": 0.4653995072157691, "grad_norm": 1.7419410943984985, "learning_rate": 8.120414749823077e-06, "loss": 0.6401, "step": 6611 }, { "epoch": 0.4654699049630412, "grad_norm": 1.5859366655349731, "learning_rate": 8.118839192645542e-06, "loss": 0.6937, "step": 6612 }, { "epoch": 0.46554030271031327, "grad_norm": 2.274855375289917, "learning_rate": 8.117263577298546e-06, "loss": 0.6581, "step": 6613 }, { "epoch": 0.46561070045758535, "grad_norm": 2.168102502822876, "learning_rate": 8.115687903864005e-06, "loss": 0.7059, "step": 6614 }, { "epoch": 0.4656810982048574, "grad_norm": 1.9641964435577393, "learning_rate": 8.11411217242384e-06, "loss": 0.7073, "step": 6615 }, { "epoch": 0.46575149595212956, "grad_norm": 1.7801446914672852, "learning_rate": 8.112536383059972e-06, "loss": 0.7763, "step": 6616 }, { "epoch": 0.46582189369940163, "grad_norm": 2.3308708667755127, "learning_rate": 8.110960535854331e-06, "loss": 0.7274, "step": 6617 }, { "epoch": 0.4658922914466737, "grad_norm": 1.8240830898284912, "learning_rate": 8.109384630888847e-06, "loss": 0.613, "step": 6618 }, { "epoch": 0.4659626891939458, "grad_norm": 1.6605192422866821, "learning_rate": 8.107808668245448e-06, "loss": 0.6586, "step": 6619 }, { "epoch": 0.46603308694121787, "grad_norm": 1.6053216457366943, "learning_rate": 8.106232648006076e-06, "loss": 0.6062, "step": 6620 }, { "epoch": 0.46610348468848994, "grad_norm": 1.4666861295700073, "learning_rate": 8.104656570252665e-06, "loss": 0.6005, "step": 6621 }, { "epoch": 0.4661738824357621, "grad_norm": 1.8091411590576172, "learning_rate": 8.10308043506716e-06, "loss": 0.6569, "step": 6622 }, { "epoch": 0.46624428018303415, "grad_norm": 2.287379264831543, "learning_rate": 8.101504242531502e-06, "loss": 0.7069, "step": 6623 }, { "epoch": 0.46631467793030623, "grad_norm": 1.5770342350006104, "learning_rate": 8.099927992727643e-06, "loss": 0.7911, "step": 6624 }, { "epoch": 0.4663850756775783, "grad_norm": 1.5946414470672607, "learning_rate": 8.09835168573753e-06, "loss": 0.6498, "step": 6625 }, { "epoch": 0.4664554734248504, "grad_norm": 1.9717284440994263, "learning_rate": 8.09677532164312e-06, "loss": 0.8084, "step": 6626 }, { "epoch": 0.4665258711721225, "grad_norm": 1.952942132949829, "learning_rate": 8.095198900526366e-06, "loss": 0.7824, "step": 6627 }, { "epoch": 0.4665962689193946, "grad_norm": 1.6234028339385986, "learning_rate": 8.093622422469228e-06, "loss": 0.7546, "step": 6628 }, { "epoch": 0.4666666666666667, "grad_norm": 1.7735446691513062, "learning_rate": 8.092045887553673e-06, "loss": 0.6229, "step": 6629 }, { "epoch": 0.46673706441393875, "grad_norm": 1.4549614191055298, "learning_rate": 8.090469295861661e-06, "loss": 0.6779, "step": 6630 }, { "epoch": 0.46680746216121083, "grad_norm": 1.821742057800293, "learning_rate": 8.088892647475164e-06, "loss": 0.7511, "step": 6631 }, { "epoch": 0.4668778599084829, "grad_norm": 2.043276786804199, "learning_rate": 8.087315942476151e-06, "loss": 0.6153, "step": 6632 }, { "epoch": 0.46694825765575504, "grad_norm": 1.8999780416488647, "learning_rate": 8.085739180946598e-06, "loss": 0.8009, "step": 6633 }, { "epoch": 0.4670186554030271, "grad_norm": 1.800286889076233, "learning_rate": 8.084162362968482e-06, "loss": 0.6204, "step": 6634 }, { "epoch": 0.4670890531502992, "grad_norm": 1.626577615737915, "learning_rate": 8.082585488623783e-06, "loss": 0.6875, "step": 6635 }, { "epoch": 0.46715945089757127, "grad_norm": 1.7355901002883911, "learning_rate": 8.081008557994485e-06, "loss": 0.638, "step": 6636 }, { "epoch": 0.46722984864484335, "grad_norm": 1.762460708618164, "learning_rate": 8.079431571162569e-06, "loss": 0.6176, "step": 6637 }, { "epoch": 0.4673002463921154, "grad_norm": 1.9596545696258545, "learning_rate": 8.077854528210032e-06, "loss": 0.6853, "step": 6638 }, { "epoch": 0.46737064413938756, "grad_norm": 1.9971206188201904, "learning_rate": 8.07627742921886e-06, "loss": 0.6898, "step": 6639 }, { "epoch": 0.46744104188665964, "grad_norm": 1.8275867700576782, "learning_rate": 8.074700274271051e-06, "loss": 0.7444, "step": 6640 }, { "epoch": 0.4675114396339317, "grad_norm": 1.7868245840072632, "learning_rate": 8.0731230634486e-06, "loss": 0.8214, "step": 6641 }, { "epoch": 0.4675818373812038, "grad_norm": 1.5321637392044067, "learning_rate": 8.071545796833511e-06, "loss": 0.7531, "step": 6642 }, { "epoch": 0.46765223512847587, "grad_norm": 1.5875775814056396, "learning_rate": 8.069968474507784e-06, "loss": 0.6505, "step": 6643 }, { "epoch": 0.467722632875748, "grad_norm": 1.5285100936889648, "learning_rate": 8.068391096553427e-06, "loss": 0.6346, "step": 6644 }, { "epoch": 0.4677930306230201, "grad_norm": 1.6502366065979004, "learning_rate": 8.06681366305245e-06, "loss": 0.6002, "step": 6645 }, { "epoch": 0.46786342837029216, "grad_norm": 1.8060230016708374, "learning_rate": 8.065236174086865e-06, "loss": 0.6169, "step": 6646 }, { "epoch": 0.46793382611756423, "grad_norm": 1.7972235679626465, "learning_rate": 8.063658629738687e-06, "loss": 0.7187, "step": 6647 }, { "epoch": 0.4680042238648363, "grad_norm": 1.5203578472137451, "learning_rate": 8.062081030089929e-06, "loss": 0.7106, "step": 6648 }, { "epoch": 0.4680746216121084, "grad_norm": 1.7217711210250854, "learning_rate": 8.06050337522262e-06, "loss": 0.695, "step": 6649 }, { "epoch": 0.4681450193593805, "grad_norm": 1.7866359949111938, "learning_rate": 8.05892566521878e-06, "loss": 0.7852, "step": 6650 }, { "epoch": 0.4682154171066526, "grad_norm": 1.9376652240753174, "learning_rate": 8.057347900160436e-06, "loss": 0.676, "step": 6651 }, { "epoch": 0.4682858148539247, "grad_norm": 1.4855091571807861, "learning_rate": 8.055770080129618e-06, "loss": 0.6453, "step": 6652 }, { "epoch": 0.46835621260119675, "grad_norm": 1.8638074398040771, "learning_rate": 8.054192205208356e-06, "loss": 0.7574, "step": 6653 }, { "epoch": 0.46842661034846883, "grad_norm": 1.7278921604156494, "learning_rate": 8.052614275478685e-06, "loss": 0.6543, "step": 6654 }, { "epoch": 0.46849700809574096, "grad_norm": 1.9031232595443726, "learning_rate": 8.051036291022646e-06, "loss": 0.6818, "step": 6655 }, { "epoch": 0.46856740584301304, "grad_norm": 1.8398131132125854, "learning_rate": 8.04945825192228e-06, "loss": 0.6614, "step": 6656 }, { "epoch": 0.4686378035902851, "grad_norm": 1.7867637872695923, "learning_rate": 8.047880158259624e-06, "loss": 0.6935, "step": 6657 }, { "epoch": 0.4687082013375572, "grad_norm": 1.6679308414459229, "learning_rate": 8.046302010116735e-06, "loss": 0.6961, "step": 6658 }, { "epoch": 0.4687785990848293, "grad_norm": 2.2838516235351562, "learning_rate": 8.044723807575654e-06, "loss": 0.6113, "step": 6659 }, { "epoch": 0.46884899683210135, "grad_norm": 3.0329344272613525, "learning_rate": 8.043145550718436e-06, "loss": 0.6943, "step": 6660 }, { "epoch": 0.4689193945793735, "grad_norm": 1.9068228006362915, "learning_rate": 8.04156723962714e-06, "loss": 0.7667, "step": 6661 }, { "epoch": 0.46898979232664556, "grad_norm": 1.9434071779251099, "learning_rate": 8.039988874383815e-06, "loss": 0.7173, "step": 6662 }, { "epoch": 0.46906019007391764, "grad_norm": 1.66643226146698, "learning_rate": 8.038410455070528e-06, "loss": 0.7289, "step": 6663 }, { "epoch": 0.4691305878211897, "grad_norm": 2.0360026359558105, "learning_rate": 8.036831981769342e-06, "loss": 0.7049, "step": 6664 }, { "epoch": 0.4692009855684618, "grad_norm": 1.9816287755966187, "learning_rate": 8.035253454562322e-06, "loss": 0.6698, "step": 6665 }, { "epoch": 0.46927138331573387, "grad_norm": 1.8286796808242798, "learning_rate": 8.033674873531537e-06, "loss": 0.622, "step": 6666 }, { "epoch": 0.469341781063006, "grad_norm": 1.9376773834228516, "learning_rate": 8.032096238759058e-06, "loss": 0.6757, "step": 6667 }, { "epoch": 0.4694121788102781, "grad_norm": 1.7383314371109009, "learning_rate": 8.030517550326964e-06, "loss": 0.6646, "step": 6668 }, { "epoch": 0.46948257655755016, "grad_norm": 1.5484790802001953, "learning_rate": 8.028938808317325e-06, "loss": 0.6853, "step": 6669 }, { "epoch": 0.46955297430482223, "grad_norm": 1.6185481548309326, "learning_rate": 8.027360012812228e-06, "loss": 0.7082, "step": 6670 }, { "epoch": 0.4696233720520943, "grad_norm": 1.741227388381958, "learning_rate": 8.025781163893753e-06, "loss": 0.7014, "step": 6671 }, { "epoch": 0.46969376979936645, "grad_norm": 1.3335667848587036, "learning_rate": 8.024202261643987e-06, "loss": 0.7158, "step": 6672 }, { "epoch": 0.4697641675466385, "grad_norm": 1.6549283266067505, "learning_rate": 8.022623306145017e-06, "loss": 0.6617, "step": 6673 }, { "epoch": 0.4698345652939106, "grad_norm": 1.5525285005569458, "learning_rate": 8.021044297478935e-06, "loss": 0.6158, "step": 6674 }, { "epoch": 0.4699049630411827, "grad_norm": 1.9705122709274292, "learning_rate": 8.019465235727837e-06, "loss": 0.7056, "step": 6675 }, { "epoch": 0.46997536078845475, "grad_norm": 2.035418748855591, "learning_rate": 8.017886120973816e-06, "loss": 0.6909, "step": 6676 }, { "epoch": 0.47004575853572683, "grad_norm": 1.605615258216858, "learning_rate": 8.016306953298976e-06, "loss": 0.672, "step": 6677 }, { "epoch": 0.47011615628299896, "grad_norm": 1.9544897079467773, "learning_rate": 8.014727732785415e-06, "loss": 0.8187, "step": 6678 }, { "epoch": 0.47018655403027104, "grad_norm": 1.8247848749160767, "learning_rate": 8.013148459515241e-06, "loss": 0.7206, "step": 6679 }, { "epoch": 0.4702569517775431, "grad_norm": 1.7818752527236938, "learning_rate": 8.011569133570562e-06, "loss": 0.7857, "step": 6680 }, { "epoch": 0.4703273495248152, "grad_norm": 1.9644584655761719, "learning_rate": 8.009989755033485e-06, "loss": 0.6586, "step": 6681 }, { "epoch": 0.4703977472720873, "grad_norm": 2.0757572650909424, "learning_rate": 8.008410323986128e-06, "loss": 0.7885, "step": 6682 }, { "epoch": 0.4704681450193594, "grad_norm": 1.6330646276474, "learning_rate": 8.006830840510605e-06, "loss": 0.6482, "step": 6683 }, { "epoch": 0.4705385427666315, "grad_norm": 1.9295967817306519, "learning_rate": 8.005251304689034e-06, "loss": 0.6695, "step": 6684 }, { "epoch": 0.47060894051390356, "grad_norm": 2.068580150604248, "learning_rate": 8.003671716603538e-06, "loss": 0.743, "step": 6685 }, { "epoch": 0.47067933826117564, "grad_norm": 2.1411521434783936, "learning_rate": 8.00209207633624e-06, "loss": 0.7561, "step": 6686 }, { "epoch": 0.4707497360084477, "grad_norm": 1.6866395473480225, "learning_rate": 8.000512383969266e-06, "loss": 0.7009, "step": 6687 }, { "epoch": 0.4708201337557198, "grad_norm": 2.305881977081299, "learning_rate": 7.99893263958475e-06, "loss": 0.7401, "step": 6688 }, { "epoch": 0.4708905315029919, "grad_norm": 1.8394367694854736, "learning_rate": 7.997352843264817e-06, "loss": 0.7403, "step": 6689 }, { "epoch": 0.470960929250264, "grad_norm": 1.721963882446289, "learning_rate": 7.99577299509161e-06, "loss": 0.6351, "step": 6690 }, { "epoch": 0.4710313269975361, "grad_norm": 1.8732179403305054, "learning_rate": 7.994193095147263e-06, "loss": 0.7327, "step": 6691 }, { "epoch": 0.47110172474480816, "grad_norm": 1.6406373977661133, "learning_rate": 7.992613143513915e-06, "loss": 0.6805, "step": 6692 }, { "epoch": 0.47117212249208024, "grad_norm": 1.5529100894927979, "learning_rate": 7.99103314027371e-06, "loss": 0.7675, "step": 6693 }, { "epoch": 0.4712425202393523, "grad_norm": 2.229367971420288, "learning_rate": 7.989453085508798e-06, "loss": 0.6014, "step": 6694 }, { "epoch": 0.47131291798662445, "grad_norm": 2.185915231704712, "learning_rate": 7.987872979301323e-06, "loss": 0.647, "step": 6695 }, { "epoch": 0.4713833157338965, "grad_norm": 1.5875635147094727, "learning_rate": 7.986292821733435e-06, "loss": 0.6269, "step": 6696 }, { "epoch": 0.4714537134811686, "grad_norm": 1.5620105266571045, "learning_rate": 7.984712612887292e-06, "loss": 0.6604, "step": 6697 }, { "epoch": 0.4715241112284407, "grad_norm": 1.664412021636963, "learning_rate": 7.983132352845048e-06, "loss": 0.7471, "step": 6698 }, { "epoch": 0.47159450897571276, "grad_norm": 1.703757643699646, "learning_rate": 7.981552041688861e-06, "loss": 0.7221, "step": 6699 }, { "epoch": 0.4716649067229849, "grad_norm": 1.6820560693740845, "learning_rate": 7.979971679500896e-06, "loss": 0.6342, "step": 6700 }, { "epoch": 0.47173530447025697, "grad_norm": 1.5384210348129272, "learning_rate": 7.978391266363317e-06, "loss": 0.6757, "step": 6701 }, { "epoch": 0.47180570221752904, "grad_norm": 1.7945889234542847, "learning_rate": 7.976810802358287e-06, "loss": 0.7233, "step": 6702 }, { "epoch": 0.4718760999648011, "grad_norm": 2.419027328491211, "learning_rate": 7.975230287567982e-06, "loss": 0.6333, "step": 6703 }, { "epoch": 0.4719464977120732, "grad_norm": 2.267399549484253, "learning_rate": 7.973649722074568e-06, "loss": 0.7891, "step": 6704 }, { "epoch": 0.4720168954593453, "grad_norm": 2.125166654586792, "learning_rate": 7.972069105960225e-06, "loss": 0.7451, "step": 6705 }, { "epoch": 0.4720872932066174, "grad_norm": 1.4931856393814087, "learning_rate": 7.97048843930713e-06, "loss": 0.7722, "step": 6706 }, { "epoch": 0.4721576909538895, "grad_norm": 1.4914391040802002, "learning_rate": 7.968907722197459e-06, "loss": 0.7173, "step": 6707 }, { "epoch": 0.47222808870116156, "grad_norm": 1.75771963596344, "learning_rate": 7.967326954713398e-06, "loss": 0.6952, "step": 6708 }, { "epoch": 0.47229848644843364, "grad_norm": 1.6117488145828247, "learning_rate": 7.965746136937136e-06, "loss": 0.7264, "step": 6709 }, { "epoch": 0.4723688841957057, "grad_norm": 1.7845412492752075, "learning_rate": 7.964165268950855e-06, "loss": 0.7334, "step": 6710 }, { "epoch": 0.47243928194297785, "grad_norm": 1.6614781618118286, "learning_rate": 7.962584350836749e-06, "loss": 0.6764, "step": 6711 }, { "epoch": 0.47250967969024993, "grad_norm": 2.0798163414001465, "learning_rate": 7.961003382677013e-06, "loss": 0.654, "step": 6712 }, { "epoch": 0.472580077437522, "grad_norm": 2.6189446449279785, "learning_rate": 7.959422364553838e-06, "loss": 0.6912, "step": 6713 }, { "epoch": 0.4726504751847941, "grad_norm": 1.920992136001587, "learning_rate": 7.957841296549426e-06, "loss": 0.6182, "step": 6714 }, { "epoch": 0.47272087293206616, "grad_norm": 1.5950154066085815, "learning_rate": 7.956260178745977e-06, "loss": 0.6952, "step": 6715 }, { "epoch": 0.47279127067933824, "grad_norm": 1.7773045301437378, "learning_rate": 7.954679011225697e-06, "loss": 0.7067, "step": 6716 }, { "epoch": 0.47286166842661037, "grad_norm": 2.189542770385742, "learning_rate": 7.953097794070788e-06, "loss": 0.745, "step": 6717 }, { "epoch": 0.47293206617388245, "grad_norm": 1.784259557723999, "learning_rate": 7.951516527363462e-06, "loss": 0.7161, "step": 6718 }, { "epoch": 0.4730024639211545, "grad_norm": 2.1091091632843018, "learning_rate": 7.949935211185935e-06, "loss": 0.6174, "step": 6719 }, { "epoch": 0.4730728616684266, "grad_norm": 1.9375001192092896, "learning_rate": 7.948353845620411e-06, "loss": 0.7722, "step": 6720 }, { "epoch": 0.4731432594156987, "grad_norm": 1.7820405960083008, "learning_rate": 7.94677243074911e-06, "loss": 0.6173, "step": 6721 }, { "epoch": 0.47321365716297076, "grad_norm": 1.8287259340286255, "learning_rate": 7.945190966654258e-06, "loss": 0.6496, "step": 6722 }, { "epoch": 0.4732840549102429, "grad_norm": 1.7746152877807617, "learning_rate": 7.943609453418069e-06, "loss": 0.73, "step": 6723 }, { "epoch": 0.47335445265751497, "grad_norm": 1.6024863719940186, "learning_rate": 7.942027891122769e-06, "loss": 0.6477, "step": 6724 }, { "epoch": 0.47342485040478705, "grad_norm": 1.6074565649032593, "learning_rate": 7.940446279850589e-06, "loss": 0.7123, "step": 6725 }, { "epoch": 0.4734952481520591, "grad_norm": 1.7225322723388672, "learning_rate": 7.938864619683754e-06, "loss": 0.669, "step": 6726 }, { "epoch": 0.4735656458993312, "grad_norm": 1.755051851272583, "learning_rate": 7.937282910704495e-06, "loss": 0.6147, "step": 6727 }, { "epoch": 0.47363604364660333, "grad_norm": 1.7224875688552856, "learning_rate": 7.935701152995052e-06, "loss": 0.7224, "step": 6728 }, { "epoch": 0.4737064413938754, "grad_norm": 1.8651607036590576, "learning_rate": 7.934119346637655e-06, "loss": 0.6724, "step": 6729 }, { "epoch": 0.4737768391411475, "grad_norm": 2.0180118083953857, "learning_rate": 7.932537491714549e-06, "loss": 0.7529, "step": 6730 }, { "epoch": 0.47384723688841957, "grad_norm": 1.818206787109375, "learning_rate": 7.930955588307975e-06, "loss": 0.726, "step": 6731 }, { "epoch": 0.47391763463569164, "grad_norm": 1.8935813903808594, "learning_rate": 7.929373636500174e-06, "loss": 0.6805, "step": 6732 }, { "epoch": 0.4739880323829637, "grad_norm": 1.8435977697372437, "learning_rate": 7.927791636373398e-06, "loss": 0.7083, "step": 6733 }, { "epoch": 0.47405843013023585, "grad_norm": 1.6589529514312744, "learning_rate": 7.926209588009896e-06, "loss": 0.64, "step": 6734 }, { "epoch": 0.47412882787750793, "grad_norm": 2.126020908355713, "learning_rate": 7.924627491491917e-06, "loss": 0.6489, "step": 6735 }, { "epoch": 0.47419922562478, "grad_norm": 2.018960952758789, "learning_rate": 7.923045346901717e-06, "loss": 0.6663, "step": 6736 }, { "epoch": 0.4742696233720521, "grad_norm": 1.7846347093582153, "learning_rate": 7.921463154321553e-06, "loss": 0.6368, "step": 6737 }, { "epoch": 0.47434002111932416, "grad_norm": 1.669547438621521, "learning_rate": 7.919880913833686e-06, "loss": 0.7901, "step": 6738 }, { "epoch": 0.4744104188665963, "grad_norm": 1.613043189048767, "learning_rate": 7.918298625520379e-06, "loss": 0.6952, "step": 6739 }, { "epoch": 0.4744808166138684, "grad_norm": 2.6151413917541504, "learning_rate": 7.916716289463891e-06, "loss": 0.8331, "step": 6740 }, { "epoch": 0.47455121436114045, "grad_norm": 1.2126036882400513, "learning_rate": 7.915133905746495e-06, "loss": 0.7747, "step": 6741 }, { "epoch": 0.4746216121084125, "grad_norm": 1.7755547761917114, "learning_rate": 7.91355147445046e-06, "loss": 0.736, "step": 6742 }, { "epoch": 0.4746920098556846, "grad_norm": 1.616078495979309, "learning_rate": 7.911968995658053e-06, "loss": 0.747, "step": 6743 }, { "epoch": 0.4747624076029567, "grad_norm": 1.99058997631073, "learning_rate": 7.910386469451553e-06, "loss": 0.6266, "step": 6744 }, { "epoch": 0.4748328053502288, "grad_norm": 1.845469355583191, "learning_rate": 7.90880389591324e-06, "loss": 0.623, "step": 6745 }, { "epoch": 0.4749032030975009, "grad_norm": 1.6575201749801636, "learning_rate": 7.907221275125385e-06, "loss": 0.688, "step": 6746 }, { "epoch": 0.47497360084477297, "grad_norm": 1.8003562688827515, "learning_rate": 7.905638607170274e-06, "loss": 0.6812, "step": 6747 }, { "epoch": 0.47504399859204505, "grad_norm": 1.8818820714950562, "learning_rate": 7.904055892130196e-06, "loss": 0.7288, "step": 6748 }, { "epoch": 0.4751143963393171, "grad_norm": 1.7230887413024902, "learning_rate": 7.902473130087429e-06, "loss": 0.7646, "step": 6749 }, { "epoch": 0.4751847940865892, "grad_norm": 1.739432692527771, "learning_rate": 7.900890321124266e-06, "loss": 0.7681, "step": 6750 }, { "epoch": 0.47525519183386133, "grad_norm": 1.7356570959091187, "learning_rate": 7.899307465323004e-06, "loss": 0.6426, "step": 6751 }, { "epoch": 0.4753255895811334, "grad_norm": 1.8385905027389526, "learning_rate": 7.897724562765928e-06, "loss": 0.7409, "step": 6752 }, { "epoch": 0.4753959873284055, "grad_norm": 2.226341724395752, "learning_rate": 7.896141613535342e-06, "loss": 0.6909, "step": 6753 }, { "epoch": 0.47546638507567757, "grad_norm": 1.5906906127929688, "learning_rate": 7.894558617713541e-06, "loss": 0.6834, "step": 6754 }, { "epoch": 0.47553678282294964, "grad_norm": 1.6431958675384521, "learning_rate": 7.892975575382827e-06, "loss": 0.608, "step": 6755 }, { "epoch": 0.4756071805702218, "grad_norm": 1.5227729082107544, "learning_rate": 7.891392486625504e-06, "loss": 0.625, "step": 6756 }, { "epoch": 0.47567757831749385, "grad_norm": 1.677726149559021, "learning_rate": 7.88980935152388e-06, "loss": 0.7254, "step": 6757 }, { "epoch": 0.47574797606476593, "grad_norm": 1.5039305686950684, "learning_rate": 7.888226170160259e-06, "loss": 0.6764, "step": 6758 }, { "epoch": 0.475818373812038, "grad_norm": 1.617849588394165, "learning_rate": 7.886642942616956e-06, "loss": 0.774, "step": 6759 }, { "epoch": 0.4758887715593101, "grad_norm": 1.7383544445037842, "learning_rate": 7.885059668976284e-06, "loss": 0.6313, "step": 6760 }, { "epoch": 0.47595916930658216, "grad_norm": 1.7849175930023193, "learning_rate": 7.883476349320558e-06, "loss": 0.7509, "step": 6761 }, { "epoch": 0.4760295670538543, "grad_norm": 1.8035820722579956, "learning_rate": 7.881892983732096e-06, "loss": 0.7083, "step": 6762 }, { "epoch": 0.4760999648011264, "grad_norm": 2.0053060054779053, "learning_rate": 7.880309572293221e-06, "loss": 0.6441, "step": 6763 }, { "epoch": 0.47617036254839845, "grad_norm": 1.58551025390625, "learning_rate": 7.878726115086252e-06, "loss": 0.7434, "step": 6764 }, { "epoch": 0.47624076029567053, "grad_norm": 1.7277882099151611, "learning_rate": 7.877142612193516e-06, "loss": 0.7577, "step": 6765 }, { "epoch": 0.4763111580429426, "grad_norm": 1.681213617324829, "learning_rate": 7.875559063697344e-06, "loss": 0.7618, "step": 6766 }, { "epoch": 0.47638155579021474, "grad_norm": 2.30100417137146, "learning_rate": 7.873975469680062e-06, "loss": 0.6727, "step": 6767 }, { "epoch": 0.4764519535374868, "grad_norm": 1.6710983514785767, "learning_rate": 7.872391830224003e-06, "loss": 0.7465, "step": 6768 }, { "epoch": 0.4765223512847589, "grad_norm": 1.8669520616531372, "learning_rate": 7.870808145411504e-06, "loss": 0.7627, "step": 6769 }, { "epoch": 0.47659274903203097, "grad_norm": 1.7266384363174438, "learning_rate": 7.869224415324899e-06, "loss": 0.6672, "step": 6770 }, { "epoch": 0.47666314677930305, "grad_norm": 1.5524400472640991, "learning_rate": 7.86764064004653e-06, "loss": 0.6605, "step": 6771 }, { "epoch": 0.4767335445265751, "grad_norm": 1.888788104057312, "learning_rate": 7.866056819658738e-06, "loss": 0.6984, "step": 6772 }, { "epoch": 0.47680394227384726, "grad_norm": 2.3503150939941406, "learning_rate": 7.86447295424387e-06, "loss": 0.7736, "step": 6773 }, { "epoch": 0.47687434002111934, "grad_norm": 1.8432834148406982, "learning_rate": 7.86288904388427e-06, "loss": 0.7291, "step": 6774 }, { "epoch": 0.4769447377683914, "grad_norm": 1.5287115573883057, "learning_rate": 7.861305088662283e-06, "loss": 0.8011, "step": 6775 }, { "epoch": 0.4770151355156635, "grad_norm": 1.5108696222305298, "learning_rate": 7.85972108866027e-06, "loss": 0.662, "step": 6776 }, { "epoch": 0.47708553326293557, "grad_norm": 1.983817219734192, "learning_rate": 7.858137043960574e-06, "loss": 0.7181, "step": 6777 }, { "epoch": 0.47715593101020765, "grad_norm": 2.0147266387939453, "learning_rate": 7.856552954645558e-06, "loss": 0.6873, "step": 6778 }, { "epoch": 0.4772263287574798, "grad_norm": 1.8152447938919067, "learning_rate": 7.85496882079758e-06, "loss": 0.6704, "step": 6779 }, { "epoch": 0.47729672650475186, "grad_norm": 1.962342619895935, "learning_rate": 7.853384642498996e-06, "loss": 0.7447, "step": 6780 }, { "epoch": 0.47736712425202393, "grad_norm": 1.7407258749008179, "learning_rate": 7.851800419832172e-06, "loss": 0.7036, "step": 6781 }, { "epoch": 0.477437521999296, "grad_norm": 1.9676868915557861, "learning_rate": 7.850216152879474e-06, "loss": 0.7059, "step": 6782 }, { "epoch": 0.4775079197465681, "grad_norm": 1.8255327939987183, "learning_rate": 7.848631841723267e-06, "loss": 0.6588, "step": 6783 }, { "epoch": 0.4775783174938402, "grad_norm": 1.7491040229797363, "learning_rate": 7.847047486445922e-06, "loss": 0.6114, "step": 6784 }, { "epoch": 0.4776487152411123, "grad_norm": 2.599726915359497, "learning_rate": 7.845463087129812e-06, "loss": 0.7537, "step": 6785 }, { "epoch": 0.4777191129883844, "grad_norm": 1.7656313180923462, "learning_rate": 7.84387864385731e-06, "loss": 0.6491, "step": 6786 }, { "epoch": 0.47778951073565645, "grad_norm": 1.9474107027053833, "learning_rate": 7.842294156710792e-06, "loss": 0.6144, "step": 6787 }, { "epoch": 0.47785990848292853, "grad_norm": 1.653998851776123, "learning_rate": 7.840709625772641e-06, "loss": 0.7486, "step": 6788 }, { "epoch": 0.4779303062302006, "grad_norm": 1.4568196535110474, "learning_rate": 7.83912505112523e-06, "loss": 0.8302, "step": 6789 }, { "epoch": 0.47800070397747274, "grad_norm": 2.057088613510132, "learning_rate": 7.837540432850953e-06, "loss": 0.751, "step": 6790 }, { "epoch": 0.4780711017247448, "grad_norm": 1.8542979955673218, "learning_rate": 7.835955771032187e-06, "loss": 0.7578, "step": 6791 }, { "epoch": 0.4781414994720169, "grad_norm": 1.98606276512146, "learning_rate": 7.834371065751324e-06, "loss": 0.7466, "step": 6792 }, { "epoch": 0.478211897219289, "grad_norm": 1.7949435710906982, "learning_rate": 7.832786317090754e-06, "loss": 0.8025, "step": 6793 }, { "epoch": 0.47828229496656105, "grad_norm": 1.7889776229858398, "learning_rate": 7.831201525132868e-06, "loss": 0.6185, "step": 6794 }, { "epoch": 0.4783526927138332, "grad_norm": 2.0125715732574463, "learning_rate": 7.829616689960063e-06, "loss": 0.6421, "step": 6795 }, { "epoch": 0.47842309046110526, "grad_norm": 1.8744618892669678, "learning_rate": 7.828031811654738e-06, "loss": 0.8033, "step": 6796 }, { "epoch": 0.47849348820837734, "grad_norm": 1.6122593879699707, "learning_rate": 7.826446890299284e-06, "loss": 0.6939, "step": 6797 }, { "epoch": 0.4785638859556494, "grad_norm": 1.66451895236969, "learning_rate": 7.82486192597611e-06, "loss": 0.7637, "step": 6798 }, { "epoch": 0.4786342837029215, "grad_norm": 1.9264154434204102, "learning_rate": 7.823276918767618e-06, "loss": 0.6622, "step": 6799 }, { "epoch": 0.47870468145019357, "grad_norm": 1.8041987419128418, "learning_rate": 7.821691868756214e-06, "loss": 0.6672, "step": 6800 }, { "epoch": 0.4787750791974657, "grad_norm": 1.7057160139083862, "learning_rate": 7.820106776024303e-06, "loss": 0.6944, "step": 6801 }, { "epoch": 0.4788454769447378, "grad_norm": 2.0275118350982666, "learning_rate": 7.8185216406543e-06, "loss": 0.7639, "step": 6802 }, { "epoch": 0.47891587469200986, "grad_norm": 1.6468479633331299, "learning_rate": 7.816936462728613e-06, "loss": 0.643, "step": 6803 }, { "epoch": 0.47898627243928193, "grad_norm": 1.7893774509429932, "learning_rate": 7.815351242329662e-06, "loss": 0.7106, "step": 6804 }, { "epoch": 0.479056670186554, "grad_norm": 1.7952438592910767, "learning_rate": 7.813765979539863e-06, "loss": 0.6891, "step": 6805 }, { "epoch": 0.47912706793382615, "grad_norm": 2.086951494216919, "learning_rate": 7.81218067444163e-06, "loss": 0.688, "step": 6806 }, { "epoch": 0.4791974656810982, "grad_norm": 1.694872498512268, "learning_rate": 7.81059532711739e-06, "loss": 0.6482, "step": 6807 }, { "epoch": 0.4792678634283703, "grad_norm": 1.5171092748641968, "learning_rate": 7.809009937649566e-06, "loss": 0.7119, "step": 6808 }, { "epoch": 0.4793382611756424, "grad_norm": 1.6777969598770142, "learning_rate": 7.80742450612058e-06, "loss": 0.7743, "step": 6809 }, { "epoch": 0.47940865892291445, "grad_norm": 1.7188845872879028, "learning_rate": 7.805839032612863e-06, "loss": 0.6551, "step": 6810 }, { "epoch": 0.47947905667018653, "grad_norm": 2.0222718715667725, "learning_rate": 7.804253517208848e-06, "loss": 0.6913, "step": 6811 }, { "epoch": 0.47954945441745866, "grad_norm": 1.732547402381897, "learning_rate": 7.802667959990961e-06, "loss": 0.7371, "step": 6812 }, { "epoch": 0.47961985216473074, "grad_norm": 1.7483415603637695, "learning_rate": 7.80108236104164e-06, "loss": 0.7217, "step": 6813 }, { "epoch": 0.4796902499120028, "grad_norm": 1.6961084604263306, "learning_rate": 7.799496720443326e-06, "loss": 0.6989, "step": 6814 }, { "epoch": 0.4797606476592749, "grad_norm": 1.798318862915039, "learning_rate": 7.797911038278448e-06, "loss": 0.5606, "step": 6815 }, { "epoch": 0.479831045406547, "grad_norm": 1.5289281606674194, "learning_rate": 7.796325314629453e-06, "loss": 0.7196, "step": 6816 }, { "epoch": 0.47990144315381905, "grad_norm": 3.0809807777404785, "learning_rate": 7.794739549578786e-06, "loss": 0.7038, "step": 6817 }, { "epoch": 0.4799718409010912, "grad_norm": 1.6639717817306519, "learning_rate": 7.79315374320889e-06, "loss": 0.7085, "step": 6818 }, { "epoch": 0.48004223864836326, "grad_norm": 1.9146907329559326, "learning_rate": 7.791567895602211e-06, "loss": 0.5771, "step": 6819 }, { "epoch": 0.48011263639563534, "grad_norm": 1.652601957321167, "learning_rate": 7.789982006841203e-06, "loss": 0.7023, "step": 6820 }, { "epoch": 0.4801830341429074, "grad_norm": 1.6744959354400635, "learning_rate": 7.788396077008311e-06, "loss": 0.6755, "step": 6821 }, { "epoch": 0.4802534318901795, "grad_norm": 1.8997304439544678, "learning_rate": 7.786810106185995e-06, "loss": 0.6364, "step": 6822 }, { "epoch": 0.4803238296374516, "grad_norm": 1.894555926322937, "learning_rate": 7.78522409445671e-06, "loss": 0.7505, "step": 6823 }, { "epoch": 0.4803942273847237, "grad_norm": 1.6558303833007812, "learning_rate": 7.783638041902911e-06, "loss": 0.8226, "step": 6824 }, { "epoch": 0.4804646251319958, "grad_norm": 1.7750391960144043, "learning_rate": 7.78205194860706e-06, "loss": 0.6479, "step": 6825 }, { "epoch": 0.48053502287926786, "grad_norm": 1.760072946548462, "learning_rate": 7.780465814651622e-06, "loss": 0.7342, "step": 6826 }, { "epoch": 0.48060542062653994, "grad_norm": 1.8609955310821533, "learning_rate": 7.778879640119062e-06, "loss": 0.7848, "step": 6827 }, { "epoch": 0.480675818373812, "grad_norm": 1.8693819046020508, "learning_rate": 7.777293425091838e-06, "loss": 0.8269, "step": 6828 }, { "epoch": 0.48074621612108415, "grad_norm": 1.8851139545440674, "learning_rate": 7.775707169652429e-06, "loss": 0.7324, "step": 6829 }, { "epoch": 0.4808166138683562, "grad_norm": 1.7135026454925537, "learning_rate": 7.7741208738833e-06, "loss": 0.6112, "step": 6830 }, { "epoch": 0.4808870116156283, "grad_norm": 1.630720615386963, "learning_rate": 7.772534537866926e-06, "loss": 0.6855, "step": 6831 }, { "epoch": 0.4809574093629004, "grad_norm": 1.844660997390747, "learning_rate": 7.770948161685783e-06, "loss": 0.6802, "step": 6832 }, { "epoch": 0.48102780711017246, "grad_norm": 1.9692963361740112, "learning_rate": 7.769361745422347e-06, "loss": 0.7687, "step": 6833 }, { "epoch": 0.4810982048574446, "grad_norm": 1.6308640241622925, "learning_rate": 7.767775289159095e-06, "loss": 0.6785, "step": 6834 }, { "epoch": 0.48116860260471667, "grad_norm": 1.6492557525634766, "learning_rate": 7.76618879297851e-06, "loss": 0.6873, "step": 6835 }, { "epoch": 0.48123900035198874, "grad_norm": 1.8350876569747925, "learning_rate": 7.76460225696308e-06, "loss": 0.7817, "step": 6836 }, { "epoch": 0.4813093980992608, "grad_norm": 1.6560730934143066, "learning_rate": 7.763015681195283e-06, "loss": 0.7497, "step": 6837 }, { "epoch": 0.4813797958465329, "grad_norm": 1.9248439073562622, "learning_rate": 7.761429065757608e-06, "loss": 0.677, "step": 6838 }, { "epoch": 0.481450193593805, "grad_norm": 2.6765871047973633, "learning_rate": 7.759842410732549e-06, "loss": 0.9269, "step": 6839 }, { "epoch": 0.4815205913410771, "grad_norm": 2.0128135681152344, "learning_rate": 7.758255716202593e-06, "loss": 0.7108, "step": 6840 }, { "epoch": 0.4815909890883492, "grad_norm": 2.12467098236084, "learning_rate": 7.756668982250238e-06, "loss": 0.7843, "step": 6841 }, { "epoch": 0.48166138683562126, "grad_norm": 1.8050010204315186, "learning_rate": 7.755082208957975e-06, "loss": 0.686, "step": 6842 }, { "epoch": 0.48173178458289334, "grad_norm": 1.8978787660598755, "learning_rate": 7.753495396408307e-06, "loss": 0.7196, "step": 6843 }, { "epoch": 0.4818021823301654, "grad_norm": 2.349475383758545, "learning_rate": 7.75190854468373e-06, "loss": 0.7066, "step": 6844 }, { "epoch": 0.4818725800774375, "grad_norm": 1.5959168672561646, "learning_rate": 7.750321653866745e-06, "loss": 0.7257, "step": 6845 }, { "epoch": 0.48194297782470963, "grad_norm": 2.206106662750244, "learning_rate": 7.748734724039857e-06, "loss": 0.6676, "step": 6846 }, { "epoch": 0.4820133755719817, "grad_norm": 1.589002251625061, "learning_rate": 7.747147755285577e-06, "loss": 0.7494, "step": 6847 }, { "epoch": 0.4820837733192538, "grad_norm": 1.5332880020141602, "learning_rate": 7.745560747686406e-06, "loss": 0.6531, "step": 6848 }, { "epoch": 0.48215417106652586, "grad_norm": 1.626341462135315, "learning_rate": 7.743973701324854e-06, "loss": 0.6427, "step": 6849 }, { "epoch": 0.48222456881379794, "grad_norm": 1.674383282661438, "learning_rate": 7.74238661628344e-06, "loss": 0.6109, "step": 6850 }, { "epoch": 0.48229496656107007, "grad_norm": 1.8084361553192139, "learning_rate": 7.74079949264467e-06, "loss": 0.6185, "step": 6851 }, { "epoch": 0.48236536430834215, "grad_norm": 1.7718815803527832, "learning_rate": 7.739212330491066e-06, "loss": 0.6956, "step": 6852 }, { "epoch": 0.4824357620556142, "grad_norm": 2.0361151695251465, "learning_rate": 7.737625129905142e-06, "loss": 0.6327, "step": 6853 }, { "epoch": 0.4825061598028863, "grad_norm": 1.7237802743911743, "learning_rate": 7.73603789096942e-06, "loss": 0.6821, "step": 6854 }, { "epoch": 0.4825765575501584, "grad_norm": 1.6389198303222656, "learning_rate": 7.73445061376642e-06, "loss": 0.745, "step": 6855 }, { "epoch": 0.48264695529743046, "grad_norm": 3.436924934387207, "learning_rate": 7.732863298378671e-06, "loss": 0.6952, "step": 6856 }, { "epoch": 0.4827173530447026, "grad_norm": 2.052011013031006, "learning_rate": 7.731275944888692e-06, "loss": 0.6784, "step": 6857 }, { "epoch": 0.48278775079197467, "grad_norm": 2.1205978393554688, "learning_rate": 7.729688553379015e-06, "loss": 0.6754, "step": 6858 }, { "epoch": 0.48285814853924675, "grad_norm": 1.6376497745513916, "learning_rate": 7.728101123932171e-06, "loss": 0.656, "step": 6859 }, { "epoch": 0.4829285462865188, "grad_norm": 1.5531662702560425, "learning_rate": 7.726513656630688e-06, "loss": 0.641, "step": 6860 }, { "epoch": 0.4829989440337909, "grad_norm": 1.5313804149627686, "learning_rate": 7.724926151557102e-06, "loss": 0.7632, "step": 6861 }, { "epoch": 0.48306934178106303, "grad_norm": 1.958071231842041, "learning_rate": 7.72333860879395e-06, "loss": 0.6493, "step": 6862 }, { "epoch": 0.4831397395283351, "grad_norm": 1.802428960800171, "learning_rate": 7.72175102842377e-06, "loss": 0.635, "step": 6863 }, { "epoch": 0.4832101372756072, "grad_norm": 1.6875947713851929, "learning_rate": 7.720163410529097e-06, "loss": 0.6771, "step": 6864 }, { "epoch": 0.48328053502287927, "grad_norm": 2.0611698627471924, "learning_rate": 7.718575755192478e-06, "loss": 0.6702, "step": 6865 }, { "epoch": 0.48335093277015134, "grad_norm": 1.6572887897491455, "learning_rate": 7.716988062496454e-06, "loss": 0.6124, "step": 6866 }, { "epoch": 0.4834213305174234, "grad_norm": 1.5559699535369873, "learning_rate": 7.71540033252357e-06, "loss": 0.7831, "step": 6867 }, { "epoch": 0.48349172826469555, "grad_norm": 1.7959307432174683, "learning_rate": 7.71381256535638e-06, "loss": 0.6801, "step": 6868 }, { "epoch": 0.48356212601196763, "grad_norm": 1.8208023309707642, "learning_rate": 7.712224761077424e-06, "loss": 0.7323, "step": 6869 }, { "epoch": 0.4836325237592397, "grad_norm": 1.8582745790481567, "learning_rate": 7.710636919769258e-06, "loss": 0.6894, "step": 6870 }, { "epoch": 0.4837029215065118, "grad_norm": 1.5149494409561157, "learning_rate": 7.709049041514436e-06, "loss": 0.7089, "step": 6871 }, { "epoch": 0.48377331925378386, "grad_norm": 1.8558614253997803, "learning_rate": 7.70746112639551e-06, "loss": 0.6119, "step": 6872 }, { "epoch": 0.48384371700105594, "grad_norm": 1.5925757884979248, "learning_rate": 7.705873174495041e-06, "loss": 0.5966, "step": 6873 }, { "epoch": 0.4839141147483281, "grad_norm": 2.1561038494110107, "learning_rate": 7.704285185895587e-06, "loss": 0.7329, "step": 6874 }, { "epoch": 0.48398451249560015, "grad_norm": 1.8409711122512817, "learning_rate": 7.702697160679709e-06, "loss": 0.6189, "step": 6875 }, { "epoch": 0.4840549102428722, "grad_norm": 2.340301036834717, "learning_rate": 7.701109098929966e-06, "loss": 0.7196, "step": 6876 }, { "epoch": 0.4841253079901443, "grad_norm": 1.6583633422851562, "learning_rate": 7.69952100072893e-06, "loss": 0.6701, "step": 6877 }, { "epoch": 0.4841957057374164, "grad_norm": 1.9281015396118164, "learning_rate": 7.697932866159162e-06, "loss": 0.7482, "step": 6878 }, { "epoch": 0.4842661034846885, "grad_norm": 1.9697331190109253, "learning_rate": 7.69634469530323e-06, "loss": 0.5796, "step": 6879 }, { "epoch": 0.4843365012319606, "grad_norm": 2.0149383544921875, "learning_rate": 7.694756488243707e-06, "loss": 0.7254, "step": 6880 }, { "epoch": 0.48440689897923267, "grad_norm": 1.7193833589553833, "learning_rate": 7.693168245063168e-06, "loss": 0.6366, "step": 6881 }, { "epoch": 0.48447729672650475, "grad_norm": 2.179192304611206, "learning_rate": 7.69157996584418e-06, "loss": 0.7634, "step": 6882 }, { "epoch": 0.4845476944737768, "grad_norm": 1.7407256364822388, "learning_rate": 7.689991650669327e-06, "loss": 0.6231, "step": 6883 }, { "epoch": 0.4846180922210489, "grad_norm": 1.9604324102401733, "learning_rate": 7.688403299621184e-06, "loss": 0.6798, "step": 6884 }, { "epoch": 0.48468848996832103, "grad_norm": 1.6788311004638672, "learning_rate": 7.686814912782327e-06, "loss": 0.6692, "step": 6885 }, { "epoch": 0.4847588877155931, "grad_norm": 1.578857660293579, "learning_rate": 7.685226490235341e-06, "loss": 0.6573, "step": 6886 }, { "epoch": 0.4848292854628652, "grad_norm": 1.9187318086624146, "learning_rate": 7.68363803206281e-06, "loss": 0.7205, "step": 6887 }, { "epoch": 0.48489968321013727, "grad_norm": 1.5444285869598389, "learning_rate": 7.682049538347318e-06, "loss": 0.645, "step": 6888 }, { "epoch": 0.48497008095740934, "grad_norm": 1.7999136447906494, "learning_rate": 7.680461009171453e-06, "loss": 0.7056, "step": 6889 }, { "epoch": 0.4850404787046815, "grad_norm": 1.5475364923477173, "learning_rate": 7.678872444617803e-06, "loss": 0.6521, "step": 6890 }, { "epoch": 0.48511087645195355, "grad_norm": 1.6403950452804565, "learning_rate": 7.67728384476896e-06, "loss": 0.7125, "step": 6891 }, { "epoch": 0.48518127419922563, "grad_norm": 1.7675493955612183, "learning_rate": 7.675695209707519e-06, "loss": 0.6563, "step": 6892 }, { "epoch": 0.4852516719464977, "grad_norm": 2.0272488594055176, "learning_rate": 7.67410653951607e-06, "loss": 0.6811, "step": 6893 }, { "epoch": 0.4853220696937698, "grad_norm": 2.0071732997894287, "learning_rate": 7.672517834277212e-06, "loss": 0.7213, "step": 6894 }, { "epoch": 0.48539246744104186, "grad_norm": 1.946776270866394, "learning_rate": 7.670929094073543e-06, "loss": 0.6716, "step": 6895 }, { "epoch": 0.485462865188314, "grad_norm": 2.1968894004821777, "learning_rate": 7.669340318987662e-06, "loss": 0.6047, "step": 6896 }, { "epoch": 0.4855332629355861, "grad_norm": 1.7178544998168945, "learning_rate": 7.66775150910217e-06, "loss": 0.6137, "step": 6897 }, { "epoch": 0.48560366068285815, "grad_norm": 1.840998888015747, "learning_rate": 7.666162664499677e-06, "loss": 0.6572, "step": 6898 }, { "epoch": 0.48567405843013023, "grad_norm": 1.6250286102294922, "learning_rate": 7.66457378526278e-06, "loss": 0.6604, "step": 6899 }, { "epoch": 0.4857444561774023, "grad_norm": 1.6378756761550903, "learning_rate": 7.66298487147409e-06, "loss": 0.7363, "step": 6900 }, { "epoch": 0.4858148539246744, "grad_norm": 1.3954441547393799, "learning_rate": 7.66139592321622e-06, "loss": 0.7974, "step": 6901 }, { "epoch": 0.4858852516719465, "grad_norm": 2.1520020961761475, "learning_rate": 7.659806940571774e-06, "loss": 0.6705, "step": 6902 }, { "epoch": 0.4859556494192186, "grad_norm": 1.862487554550171, "learning_rate": 7.658217923623368e-06, "loss": 0.6746, "step": 6903 }, { "epoch": 0.48602604716649067, "grad_norm": 1.9999544620513916, "learning_rate": 7.656628872453614e-06, "loss": 0.5797, "step": 6904 }, { "epoch": 0.48609644491376275, "grad_norm": 2.0365641117095947, "learning_rate": 7.655039787145133e-06, "loss": 0.8008, "step": 6905 }, { "epoch": 0.4861668426610348, "grad_norm": 1.676377296447754, "learning_rate": 7.653450667780539e-06, "loss": 0.8005, "step": 6906 }, { "epoch": 0.48623724040830696, "grad_norm": 2.526132106781006, "learning_rate": 7.651861514442454e-06, "loss": 0.6714, "step": 6907 }, { "epoch": 0.48630763815557904, "grad_norm": 1.9236596822738647, "learning_rate": 7.650272327213497e-06, "loss": 0.6981, "step": 6908 }, { "epoch": 0.4863780359028511, "grad_norm": 1.7638916969299316, "learning_rate": 7.648683106176293e-06, "loss": 0.7815, "step": 6909 }, { "epoch": 0.4864484336501232, "grad_norm": 1.848482370376587, "learning_rate": 7.647093851413469e-06, "loss": 0.8307, "step": 6910 }, { "epoch": 0.48651883139739527, "grad_norm": 1.8696870803833008, "learning_rate": 7.645504563007647e-06, "loss": 0.6834, "step": 6911 }, { "epoch": 0.48658922914466735, "grad_norm": 1.6231231689453125, "learning_rate": 7.64391524104146e-06, "loss": 0.6434, "step": 6912 }, { "epoch": 0.4866596268919395, "grad_norm": 1.7697995901107788, "learning_rate": 7.642325885597535e-06, "loss": 0.5593, "step": 6913 }, { "epoch": 0.48673002463921156, "grad_norm": 1.7313284873962402, "learning_rate": 7.640736496758506e-06, "loss": 0.677, "step": 6914 }, { "epoch": 0.48680042238648363, "grad_norm": 1.7075622081756592, "learning_rate": 7.639147074607006e-06, "loss": 0.7872, "step": 6915 }, { "epoch": 0.4868708201337557, "grad_norm": 2.933905601501465, "learning_rate": 7.637557619225672e-06, "loss": 0.8435, "step": 6916 }, { "epoch": 0.4869412178810278, "grad_norm": 1.4933298826217651, "learning_rate": 7.63596813069714e-06, "loss": 0.6623, "step": 6917 }, { "epoch": 0.4870116156282999, "grad_norm": 1.659057378768921, "learning_rate": 7.634378609104046e-06, "loss": 0.6736, "step": 6918 }, { "epoch": 0.487082013375572, "grad_norm": 1.8535329103469849, "learning_rate": 7.632789054529038e-06, "loss": 0.678, "step": 6919 }, { "epoch": 0.4871524111228441, "grad_norm": 1.8676859140396118, "learning_rate": 7.631199467054751e-06, "loss": 0.6323, "step": 6920 }, { "epoch": 0.48722280887011615, "grad_norm": 1.7391880750656128, "learning_rate": 7.629609846763832e-06, "loss": 0.7512, "step": 6921 }, { "epoch": 0.48729320661738823, "grad_norm": 1.8021104335784912, "learning_rate": 7.628020193738928e-06, "loss": 0.6357, "step": 6922 }, { "epoch": 0.4873636043646603, "grad_norm": 1.9736992120742798, "learning_rate": 7.626430508062684e-06, "loss": 0.6906, "step": 6923 }, { "epoch": 0.48743400211193244, "grad_norm": 1.762525200843811, "learning_rate": 7.624840789817751e-06, "loss": 0.6331, "step": 6924 }, { "epoch": 0.4875043998592045, "grad_norm": 1.7422032356262207, "learning_rate": 7.6232510390867805e-06, "loss": 0.5511, "step": 6925 }, { "epoch": 0.4875747976064766, "grad_norm": 1.761390209197998, "learning_rate": 7.6216612559524225e-06, "loss": 0.6981, "step": 6926 }, { "epoch": 0.4876451953537487, "grad_norm": 1.712830662727356, "learning_rate": 7.620071440497334e-06, "loss": 0.7867, "step": 6927 }, { "epoch": 0.48771559310102075, "grad_norm": 1.6549065113067627, "learning_rate": 7.61848159280417e-06, "loss": 0.7743, "step": 6928 }, { "epoch": 0.4877859908482928, "grad_norm": 1.8456015586853027, "learning_rate": 7.616891712955587e-06, "loss": 0.7167, "step": 6929 }, { "epoch": 0.48785638859556496, "grad_norm": 1.6480827331542969, "learning_rate": 7.615301801034245e-06, "loss": 0.684, "step": 6930 }, { "epoch": 0.48792678634283704, "grad_norm": 2.430166244506836, "learning_rate": 7.613711857122806e-06, "loss": 0.6773, "step": 6931 }, { "epoch": 0.4879971840901091, "grad_norm": 1.9230659008026123, "learning_rate": 7.612121881303932e-06, "loss": 0.6003, "step": 6932 }, { "epoch": 0.4880675818373812, "grad_norm": 1.657749891281128, "learning_rate": 7.610531873660285e-06, "loss": 0.6788, "step": 6933 }, { "epoch": 0.48813797958465327, "grad_norm": 1.602983832359314, "learning_rate": 7.608941834274533e-06, "loss": 0.646, "step": 6934 }, { "epoch": 0.4882083773319254, "grad_norm": 1.9395595788955688, "learning_rate": 7.607351763229346e-06, "loss": 0.7313, "step": 6935 }, { "epoch": 0.4882787750791975, "grad_norm": 1.9432252645492554, "learning_rate": 7.605761660607389e-06, "loss": 0.6917, "step": 6936 }, { "epoch": 0.48834917282646956, "grad_norm": 1.5621521472930908, "learning_rate": 7.604171526491336e-06, "loss": 0.6543, "step": 6937 }, { "epoch": 0.48841957057374163, "grad_norm": 1.8826910257339478, "learning_rate": 7.602581360963857e-06, "loss": 0.6058, "step": 6938 }, { "epoch": 0.4884899683210137, "grad_norm": 1.727573037147522, "learning_rate": 7.600991164107628e-06, "loss": 0.6908, "step": 6939 }, { "epoch": 0.4885603660682858, "grad_norm": 1.6985597610473633, "learning_rate": 7.599400936005324e-06, "loss": 0.7127, "step": 6940 }, { "epoch": 0.4886307638155579, "grad_norm": 1.7893575429916382, "learning_rate": 7.597810676739622e-06, "loss": 0.7239, "step": 6941 }, { "epoch": 0.48870116156283, "grad_norm": 1.8316552639007568, "learning_rate": 7.596220386393201e-06, "loss": 0.7303, "step": 6942 }, { "epoch": 0.4887715593101021, "grad_norm": 1.83077871799469, "learning_rate": 7.594630065048743e-06, "loss": 0.776, "step": 6943 }, { "epoch": 0.48884195705737415, "grad_norm": 2.4817512035369873, "learning_rate": 7.593039712788929e-06, "loss": 0.7137, "step": 6944 }, { "epoch": 0.48891235480464623, "grad_norm": 1.9120087623596191, "learning_rate": 7.591449329696444e-06, "loss": 0.6934, "step": 6945 }, { "epoch": 0.48898275255191836, "grad_norm": 1.8362674713134766, "learning_rate": 7.589858915853973e-06, "loss": 0.5655, "step": 6946 }, { "epoch": 0.48905315029919044, "grad_norm": 1.8294564485549927, "learning_rate": 7.588268471344202e-06, "loss": 0.6199, "step": 6947 }, { "epoch": 0.4891235480464625, "grad_norm": 1.6041282415390015, "learning_rate": 7.586677996249819e-06, "loss": 0.6743, "step": 6948 }, { "epoch": 0.4891939457937346, "grad_norm": 1.961397647857666, "learning_rate": 7.585087490653518e-06, "loss": 0.6706, "step": 6949 }, { "epoch": 0.4892643435410067, "grad_norm": 1.68181574344635, "learning_rate": 7.583496954637987e-06, "loss": 0.6074, "step": 6950 }, { "epoch": 0.48933474128827875, "grad_norm": 1.444459319114685, "learning_rate": 7.581906388285921e-06, "loss": 0.7288, "step": 6951 }, { "epoch": 0.4894051390355509, "grad_norm": 1.653060793876648, "learning_rate": 7.580315791680016e-06, "loss": 0.6949, "step": 6952 }, { "epoch": 0.48947553678282296, "grad_norm": 1.556343674659729, "learning_rate": 7.578725164902966e-06, "loss": 0.7237, "step": 6953 }, { "epoch": 0.48954593453009504, "grad_norm": 2.093841075897217, "learning_rate": 7.577134508037472e-06, "loss": 0.7744, "step": 6954 }, { "epoch": 0.4896163322773671, "grad_norm": 1.8060798645019531, "learning_rate": 7.575543821166231e-06, "loss": 0.7314, "step": 6955 }, { "epoch": 0.4896867300246392, "grad_norm": 2.0685713291168213, "learning_rate": 7.573953104371947e-06, "loss": 0.6582, "step": 6956 }, { "epoch": 0.48975712777191127, "grad_norm": 1.8766186237335205, "learning_rate": 7.572362357737319e-06, "loss": 0.6442, "step": 6957 }, { "epoch": 0.4898275255191834, "grad_norm": 1.9173319339752197, "learning_rate": 7.570771581345056e-06, "loss": 0.6777, "step": 6958 }, { "epoch": 0.4898979232664555, "grad_norm": 1.9082928895950317, "learning_rate": 7.56918077527786e-06, "loss": 0.6737, "step": 6959 }, { "epoch": 0.48996832101372756, "grad_norm": 2.4666640758514404, "learning_rate": 7.56758993961844e-06, "loss": 0.5912, "step": 6960 }, { "epoch": 0.49003871876099964, "grad_norm": 2.418625831604004, "learning_rate": 7.565999074449507e-06, "loss": 0.7114, "step": 6961 }, { "epoch": 0.4901091165082717, "grad_norm": 2.5650789737701416, "learning_rate": 7.564408179853767e-06, "loss": 0.7534, "step": 6962 }, { "epoch": 0.49017951425554385, "grad_norm": 1.9662985801696777, "learning_rate": 7.562817255913936e-06, "loss": 0.6746, "step": 6963 }, { "epoch": 0.4902499120028159, "grad_norm": 2.0140910148620605, "learning_rate": 7.561226302712726e-06, "loss": 0.7799, "step": 6964 }, { "epoch": 0.490320309750088, "grad_norm": 1.69057035446167, "learning_rate": 7.559635320332852e-06, "loss": 0.8064, "step": 6965 }, { "epoch": 0.4903907074973601, "grad_norm": 1.8184431791305542, "learning_rate": 7.558044308857031e-06, "loss": 0.6659, "step": 6966 }, { "epoch": 0.49046110524463216, "grad_norm": 1.8598313331604004, "learning_rate": 7.5564532683679815e-06, "loss": 0.6189, "step": 6967 }, { "epoch": 0.49053150299190423, "grad_norm": 1.597071886062622, "learning_rate": 7.5548621989484225e-06, "loss": 0.7868, "step": 6968 }, { "epoch": 0.49060190073917637, "grad_norm": 1.860912799835205, "learning_rate": 7.553271100681074e-06, "loss": 0.7072, "step": 6969 }, { "epoch": 0.49067229848644844, "grad_norm": 1.5628958940505981, "learning_rate": 7.551679973648661e-06, "loss": 0.6853, "step": 6970 }, { "epoch": 0.4907426962337205, "grad_norm": 2.131815195083618, "learning_rate": 7.550088817933906e-06, "loss": 0.7127, "step": 6971 }, { "epoch": 0.4908130939809926, "grad_norm": 2.1424057483673096, "learning_rate": 7.548497633619535e-06, "loss": 0.6896, "step": 6972 }, { "epoch": 0.4908834917282647, "grad_norm": 2.0388107299804688, "learning_rate": 7.5469064207882765e-06, "loss": 0.7331, "step": 6973 }, { "epoch": 0.4909538894755368, "grad_norm": 1.7795956134796143, "learning_rate": 7.545315179522857e-06, "loss": 0.6353, "step": 6974 }, { "epoch": 0.4910242872228089, "grad_norm": 2.7227673530578613, "learning_rate": 7.543723909906007e-06, "loss": 0.6363, "step": 6975 }, { "epoch": 0.49109468497008096, "grad_norm": 2.2361767292022705, "learning_rate": 7.54213261202046e-06, "loss": 0.7322, "step": 6976 }, { "epoch": 0.49116508271735304, "grad_norm": 2.2187559604644775, "learning_rate": 7.540541285948946e-06, "loss": 0.7864, "step": 6977 }, { "epoch": 0.4912354804646251, "grad_norm": 1.861690878868103, "learning_rate": 7.538949931774201e-06, "loss": 0.626, "step": 6978 }, { "epoch": 0.4913058782118972, "grad_norm": 1.9184237718582153, "learning_rate": 7.537358549578963e-06, "loss": 0.6707, "step": 6979 }, { "epoch": 0.49137627595916933, "grad_norm": 1.7319180965423584, "learning_rate": 7.535767139445964e-06, "loss": 0.7638, "step": 6980 }, { "epoch": 0.4914466737064414, "grad_norm": 1.558050274848938, "learning_rate": 7.534175701457948e-06, "loss": 0.7383, "step": 6981 }, { "epoch": 0.4915170714537135, "grad_norm": 1.718546986579895, "learning_rate": 7.532584235697655e-06, "loss": 0.6426, "step": 6982 }, { "epoch": 0.49158746920098556, "grad_norm": 1.888731598854065, "learning_rate": 7.530992742247823e-06, "loss": 0.7611, "step": 6983 }, { "epoch": 0.49165786694825764, "grad_norm": 1.664847493171692, "learning_rate": 7.529401221191198e-06, "loss": 0.7214, "step": 6984 }, { "epoch": 0.4917282646955297, "grad_norm": 1.8096699714660645, "learning_rate": 7.527809672610523e-06, "loss": 0.7514, "step": 6985 }, { "epoch": 0.49179866244280185, "grad_norm": 1.9542627334594727, "learning_rate": 7.526218096588547e-06, "loss": 0.7228, "step": 6986 }, { "epoch": 0.4918690601900739, "grad_norm": 2.019768476486206, "learning_rate": 7.524626493208015e-06, "loss": 0.6225, "step": 6987 }, { "epoch": 0.491939457937346, "grad_norm": 2.2046096324920654, "learning_rate": 7.523034862551677e-06, "loss": 0.7635, "step": 6988 }, { "epoch": 0.4920098556846181, "grad_norm": 2.4870824813842773, "learning_rate": 7.521443204702283e-06, "loss": 0.7522, "step": 6989 }, { "epoch": 0.49208025343189016, "grad_norm": 2.8132431507110596, "learning_rate": 7.519851519742582e-06, "loss": 0.7137, "step": 6990 }, { "epoch": 0.4921506511791623, "grad_norm": 2.0460615158081055, "learning_rate": 7.5182598077553315e-06, "loss": 0.6913, "step": 6991 }, { "epoch": 0.49222104892643437, "grad_norm": 2.3621294498443604, "learning_rate": 7.516668068823283e-06, "loss": 0.5993, "step": 6992 }, { "epoch": 0.49229144667370645, "grad_norm": 1.657876968383789, "learning_rate": 7.515076303029196e-06, "loss": 0.6493, "step": 6993 }, { "epoch": 0.4923618444209785, "grad_norm": 1.7639286518096924, "learning_rate": 7.513484510455824e-06, "loss": 0.6446, "step": 6994 }, { "epoch": 0.4924322421682506, "grad_norm": 1.784130334854126, "learning_rate": 7.511892691185927e-06, "loss": 0.6923, "step": 6995 }, { "epoch": 0.4925026399155227, "grad_norm": 1.6949496269226074, "learning_rate": 7.5103008453022666e-06, "loss": 0.6911, "step": 6996 }, { "epoch": 0.4925730376627948, "grad_norm": 2.0152041912078857, "learning_rate": 7.508708972887602e-06, "loss": 0.6932, "step": 6997 }, { "epoch": 0.4926434354100669, "grad_norm": 2.474036931991577, "learning_rate": 7.507117074024698e-06, "loss": 0.7901, "step": 6998 }, { "epoch": 0.49271383315733897, "grad_norm": 1.7222814559936523, "learning_rate": 7.505525148796318e-06, "loss": 0.678, "step": 6999 }, { "epoch": 0.49278423090461104, "grad_norm": 1.8365870714187622, "learning_rate": 7.5039331972852285e-06, "loss": 0.71, "step": 7000 }, { "epoch": 0.4928546286518831, "grad_norm": 2.084662675857544, "learning_rate": 7.502341219574194e-06, "loss": 0.7748, "step": 7001 }, { "epoch": 0.49292502639915525, "grad_norm": 1.9396532773971558, "learning_rate": 7.500749215745985e-06, "loss": 0.8144, "step": 7002 }, { "epoch": 0.49299542414642733, "grad_norm": 1.605641484260559, "learning_rate": 7.499157185883372e-06, "loss": 0.8003, "step": 7003 }, { "epoch": 0.4930658218936994, "grad_norm": 1.9236613512039185, "learning_rate": 7.4975651300691225e-06, "loss": 0.6736, "step": 7004 }, { "epoch": 0.4931362196409715, "grad_norm": 2.010066032409668, "learning_rate": 7.495973048386013e-06, "loss": 0.633, "step": 7005 }, { "epoch": 0.49320661738824356, "grad_norm": 2.140815258026123, "learning_rate": 7.494380940916816e-06, "loss": 0.7284, "step": 7006 }, { "epoch": 0.49327701513551564, "grad_norm": 1.5142797231674194, "learning_rate": 7.492788807744304e-06, "loss": 0.6917, "step": 7007 }, { "epoch": 0.4933474128827878, "grad_norm": 1.7790122032165527, "learning_rate": 7.491196648951256e-06, "loss": 0.7207, "step": 7008 }, { "epoch": 0.49341781063005985, "grad_norm": 1.7448272705078125, "learning_rate": 7.489604464620452e-06, "loss": 0.6599, "step": 7009 }, { "epoch": 0.4934882083773319, "grad_norm": 1.745219111442566, "learning_rate": 7.488012254834666e-06, "loss": 0.715, "step": 7010 }, { "epoch": 0.493558606124604, "grad_norm": 2.1835439205169678, "learning_rate": 7.486420019676681e-06, "loss": 0.6204, "step": 7011 }, { "epoch": 0.4936290038718761, "grad_norm": 1.7460306882858276, "learning_rate": 7.48482775922928e-06, "loss": 0.7477, "step": 7012 }, { "epoch": 0.4936994016191482, "grad_norm": 1.6942793130874634, "learning_rate": 7.483235473575244e-06, "loss": 0.7429, "step": 7013 }, { "epoch": 0.4937697993664203, "grad_norm": 1.651778221130371, "learning_rate": 7.481643162797356e-06, "loss": 0.6965, "step": 7014 }, { "epoch": 0.49384019711369237, "grad_norm": 1.656455397605896, "learning_rate": 7.480050826978408e-06, "loss": 0.6788, "step": 7015 }, { "epoch": 0.49391059486096445, "grad_norm": 1.9954520463943481, "learning_rate": 7.478458466201181e-06, "loss": 0.7843, "step": 7016 }, { "epoch": 0.4939809926082365, "grad_norm": 1.6015024185180664, "learning_rate": 7.476866080548464e-06, "loss": 0.7366, "step": 7017 }, { "epoch": 0.4940513903555086, "grad_norm": 2.033125877380371, "learning_rate": 7.475273670103047e-06, "loss": 0.6911, "step": 7018 }, { "epoch": 0.49412178810278073, "grad_norm": 1.6838514804840088, "learning_rate": 7.4736812349477244e-06, "loss": 0.6515, "step": 7019 }, { "epoch": 0.4941921858500528, "grad_norm": 1.943515658378601, "learning_rate": 7.472088775165283e-06, "loss": 0.6577, "step": 7020 }, { "epoch": 0.4942625835973249, "grad_norm": 1.4152088165283203, "learning_rate": 7.470496290838519e-06, "loss": 0.6022, "step": 7021 }, { "epoch": 0.49433298134459697, "grad_norm": 1.5996613502502441, "learning_rate": 7.4689037820502275e-06, "loss": 0.6404, "step": 7022 }, { "epoch": 0.49440337909186904, "grad_norm": 1.835863709449768, "learning_rate": 7.467311248883202e-06, "loss": 0.7183, "step": 7023 }, { "epoch": 0.4944737768391411, "grad_norm": 1.6736773252487183, "learning_rate": 7.465718691420244e-06, "loss": 0.711, "step": 7024 }, { "epoch": 0.49454417458641325, "grad_norm": 1.7038923501968384, "learning_rate": 7.464126109744148e-06, "loss": 0.6618, "step": 7025 }, { "epoch": 0.49461457233368533, "grad_norm": 1.5762677192687988, "learning_rate": 7.462533503937715e-06, "loss": 0.6587, "step": 7026 }, { "epoch": 0.4946849700809574, "grad_norm": 1.9460824728012085, "learning_rate": 7.460940874083747e-06, "loss": 0.6836, "step": 7027 }, { "epoch": 0.4947553678282295, "grad_norm": 1.8446485996246338, "learning_rate": 7.459348220265044e-06, "loss": 0.7682, "step": 7028 }, { "epoch": 0.49482576557550156, "grad_norm": 1.7170295715332031, "learning_rate": 7.457755542564412e-06, "loss": 0.682, "step": 7029 }, { "epoch": 0.4948961633227737, "grad_norm": 2.0113134384155273, "learning_rate": 7.456162841064656e-06, "loss": 0.7363, "step": 7030 }, { "epoch": 0.4949665610700458, "grad_norm": 1.9247658252716064, "learning_rate": 7.454570115848579e-06, "loss": 0.6938, "step": 7031 }, { "epoch": 0.49503695881731785, "grad_norm": 1.763378381729126, "learning_rate": 7.45297736699899e-06, "loss": 0.6336, "step": 7032 }, { "epoch": 0.49510735656458993, "grad_norm": 1.8367911577224731, "learning_rate": 7.4513845945987e-06, "loss": 0.5711, "step": 7033 }, { "epoch": 0.495177754311862, "grad_norm": 1.6368985176086426, "learning_rate": 7.449791798730513e-06, "loss": 0.6775, "step": 7034 }, { "epoch": 0.4952481520591341, "grad_norm": 1.787818193435669, "learning_rate": 7.448198979477244e-06, "loss": 0.6278, "step": 7035 }, { "epoch": 0.4953185498064062, "grad_norm": 1.6443394422531128, "learning_rate": 7.446606136921704e-06, "loss": 0.7414, "step": 7036 }, { "epoch": 0.4953889475536783, "grad_norm": 1.5530809164047241, "learning_rate": 7.445013271146707e-06, "loss": 0.5143, "step": 7037 }, { "epoch": 0.49545934530095037, "grad_norm": 1.9387260675430298, "learning_rate": 7.443420382235066e-06, "loss": 0.6558, "step": 7038 }, { "epoch": 0.49552974304822245, "grad_norm": 2.3580756187438965, "learning_rate": 7.441827470269599e-06, "loss": 0.7611, "step": 7039 }, { "epoch": 0.4956001407954945, "grad_norm": 1.61228609085083, "learning_rate": 7.44023453533312e-06, "loss": 0.671, "step": 7040 }, { "epoch": 0.49567053854276666, "grad_norm": 1.9537783861160278, "learning_rate": 7.43864157750845e-06, "loss": 0.6387, "step": 7041 }, { "epoch": 0.49574093629003874, "grad_norm": 1.6002590656280518, "learning_rate": 7.437048596878408e-06, "loss": 0.6516, "step": 7042 }, { "epoch": 0.4958113340373108, "grad_norm": 1.6247228384017944, "learning_rate": 7.435455593525813e-06, "loss": 0.7215, "step": 7043 }, { "epoch": 0.4958817317845829, "grad_norm": 1.460218071937561, "learning_rate": 7.433862567533487e-06, "loss": 0.7531, "step": 7044 }, { "epoch": 0.49595212953185497, "grad_norm": 1.7335447072982788, "learning_rate": 7.4322695189842546e-06, "loss": 0.644, "step": 7045 }, { "epoch": 0.49602252727912705, "grad_norm": 2.318429946899414, "learning_rate": 7.430676447960935e-06, "loss": 0.6135, "step": 7046 }, { "epoch": 0.4960929250263992, "grad_norm": 1.9972351789474487, "learning_rate": 7.429083354546358e-06, "loss": 0.6568, "step": 7047 }, { "epoch": 0.49616332277367126, "grad_norm": 1.7915931940078735, "learning_rate": 7.4274902388233524e-06, "loss": 0.7076, "step": 7048 }, { "epoch": 0.49623372052094333, "grad_norm": 1.8487696647644043, "learning_rate": 7.425897100874738e-06, "loss": 0.6255, "step": 7049 }, { "epoch": 0.4963041182682154, "grad_norm": 2.192624092102051, "learning_rate": 7.4243039407833485e-06, "loss": 0.6028, "step": 7050 }, { "epoch": 0.4963745160154875, "grad_norm": 1.9773788452148438, "learning_rate": 7.422710758632014e-06, "loss": 0.6316, "step": 7051 }, { "epoch": 0.49644491376275957, "grad_norm": 1.8058468103408813, "learning_rate": 7.421117554503564e-06, "loss": 0.6866, "step": 7052 }, { "epoch": 0.4965153115100317, "grad_norm": 1.97904634475708, "learning_rate": 7.41952432848083e-06, "loss": 0.6849, "step": 7053 }, { "epoch": 0.4965857092573038, "grad_norm": 2.0759966373443604, "learning_rate": 7.417931080646648e-06, "loss": 0.6282, "step": 7054 }, { "epoch": 0.49665610700457585, "grad_norm": 2.0426182746887207, "learning_rate": 7.416337811083849e-06, "loss": 0.6633, "step": 7055 }, { "epoch": 0.49672650475184793, "grad_norm": 1.7802667617797852, "learning_rate": 7.414744519875271e-06, "loss": 0.621, "step": 7056 }, { "epoch": 0.49679690249912, "grad_norm": 1.903968095779419, "learning_rate": 7.41315120710375e-06, "loss": 0.7276, "step": 7057 }, { "epoch": 0.49686730024639214, "grad_norm": 1.7801588773727417, "learning_rate": 7.411557872852122e-06, "loss": 0.7051, "step": 7058 }, { "epoch": 0.4969376979936642, "grad_norm": 2.7071752548217773, "learning_rate": 7.409964517203229e-06, "loss": 0.7603, "step": 7059 }, { "epoch": 0.4970080957409363, "grad_norm": 1.6559593677520752, "learning_rate": 7.408371140239909e-06, "loss": 0.6092, "step": 7060 }, { "epoch": 0.4970784934882084, "grad_norm": 1.9004149436950684, "learning_rate": 7.4067777420450045e-06, "loss": 0.6834, "step": 7061 }, { "epoch": 0.49714889123548045, "grad_norm": 2.0391743183135986, "learning_rate": 7.405184322701355e-06, "loss": 0.7472, "step": 7062 }, { "epoch": 0.4972192889827525, "grad_norm": 1.5938483476638794, "learning_rate": 7.4035908822918066e-06, "loss": 0.6972, "step": 7063 }, { "epoch": 0.49728968673002466, "grad_norm": 1.5913429260253906, "learning_rate": 7.4019974208992025e-06, "loss": 0.6414, "step": 7064 }, { "epoch": 0.49736008447729674, "grad_norm": 1.9855729341506958, "learning_rate": 7.400403938606389e-06, "loss": 0.6589, "step": 7065 }, { "epoch": 0.4974304822245688, "grad_norm": 1.7035506963729858, "learning_rate": 7.398810435496212e-06, "loss": 0.8007, "step": 7066 }, { "epoch": 0.4975008799718409, "grad_norm": 2.010617971420288, "learning_rate": 7.397216911651519e-06, "loss": 0.6365, "step": 7067 }, { "epoch": 0.49757127771911297, "grad_norm": 1.7975099086761475, "learning_rate": 7.395623367155158e-06, "loss": 0.7183, "step": 7068 }, { "epoch": 0.4976416754663851, "grad_norm": 1.8351850509643555, "learning_rate": 7.394029802089982e-06, "loss": 0.7388, "step": 7069 }, { "epoch": 0.4977120732136572, "grad_norm": 1.765515685081482, "learning_rate": 7.392436216538837e-06, "loss": 0.7655, "step": 7070 }, { "epoch": 0.49778247096092926, "grad_norm": 1.8757396936416626, "learning_rate": 7.390842610584579e-06, "loss": 0.7279, "step": 7071 }, { "epoch": 0.49785286870820133, "grad_norm": 2.2329275608062744, "learning_rate": 7.389248984310062e-06, "loss": 0.7842, "step": 7072 }, { "epoch": 0.4979232664554734, "grad_norm": 1.792484164237976, "learning_rate": 7.387655337798135e-06, "loss": 0.7458, "step": 7073 }, { "epoch": 0.4979936642027455, "grad_norm": 1.526589274406433, "learning_rate": 7.386061671131656e-06, "loss": 0.6638, "step": 7074 }, { "epoch": 0.4980640619500176, "grad_norm": 1.6607707738876343, "learning_rate": 7.384467984393482e-06, "loss": 0.6695, "step": 7075 }, { "epoch": 0.4981344596972897, "grad_norm": 1.6075760126113892, "learning_rate": 7.382874277666468e-06, "loss": 0.773, "step": 7076 }, { "epoch": 0.4982048574445618, "grad_norm": 1.5864406824111938, "learning_rate": 7.381280551033475e-06, "loss": 0.5967, "step": 7077 }, { "epoch": 0.49827525519183385, "grad_norm": 1.5365228652954102, "learning_rate": 7.379686804577361e-06, "loss": 0.5035, "step": 7078 }, { "epoch": 0.49834565293910593, "grad_norm": 1.7556229829788208, "learning_rate": 7.378093038380986e-06, "loss": 0.7142, "step": 7079 }, { "epoch": 0.498416050686378, "grad_norm": 1.8772276639938354, "learning_rate": 7.37649925252721e-06, "loss": 0.6101, "step": 7080 }, { "epoch": 0.49848644843365014, "grad_norm": 1.6779102087020874, "learning_rate": 7.374905447098898e-06, "loss": 0.6835, "step": 7081 }, { "epoch": 0.4985568461809222, "grad_norm": 1.6635017395019531, "learning_rate": 7.373311622178911e-06, "loss": 0.7937, "step": 7082 }, { "epoch": 0.4986272439281943, "grad_norm": 1.9517468214035034, "learning_rate": 7.3717177778501175e-06, "loss": 0.6826, "step": 7083 }, { "epoch": 0.4986976416754664, "grad_norm": 1.8456121683120728, "learning_rate": 7.3701239141953776e-06, "loss": 0.7476, "step": 7084 }, { "epoch": 0.49876803942273845, "grad_norm": 1.8652701377868652, "learning_rate": 7.368530031297561e-06, "loss": 0.7342, "step": 7085 }, { "epoch": 0.4988384371700106, "grad_norm": 1.729322910308838, "learning_rate": 7.366936129239534e-06, "loss": 0.6523, "step": 7086 }, { "epoch": 0.49890883491728266, "grad_norm": 1.5562351942062378, "learning_rate": 7.3653422081041675e-06, "loss": 0.6687, "step": 7087 }, { "epoch": 0.49897923266455474, "grad_norm": 2.2158565521240234, "learning_rate": 7.363748267974326e-06, "loss": 0.6906, "step": 7088 }, { "epoch": 0.4990496304118268, "grad_norm": 1.9811062812805176, "learning_rate": 7.362154308932883e-06, "loss": 0.7286, "step": 7089 }, { "epoch": 0.4991200281590989, "grad_norm": 1.9973886013031006, "learning_rate": 7.36056033106271e-06, "loss": 0.7246, "step": 7090 }, { "epoch": 0.49919042590637097, "grad_norm": 1.643250584602356, "learning_rate": 7.358966334446677e-06, "loss": 0.7212, "step": 7091 }, { "epoch": 0.4992608236536431, "grad_norm": 1.6952171325683594, "learning_rate": 7.357372319167662e-06, "loss": 0.6646, "step": 7092 }, { "epoch": 0.4993312214009152, "grad_norm": 1.6178512573242188, "learning_rate": 7.355778285308537e-06, "loss": 0.6648, "step": 7093 }, { "epoch": 0.49940161914818726, "grad_norm": 1.6944774389266968, "learning_rate": 7.354184232952174e-06, "loss": 0.7332, "step": 7094 }, { "epoch": 0.49947201689545934, "grad_norm": 1.8400647640228271, "learning_rate": 7.3525901621814525e-06, "loss": 0.8006, "step": 7095 }, { "epoch": 0.4995424146427314, "grad_norm": 1.9879798889160156, "learning_rate": 7.35099607307925e-06, "loss": 0.65, "step": 7096 }, { "epoch": 0.49961281239000355, "grad_norm": 1.9663934707641602, "learning_rate": 7.349401965728444e-06, "loss": 0.7327, "step": 7097 }, { "epoch": 0.4996832101372756, "grad_norm": 1.7615514993667603, "learning_rate": 7.347807840211912e-06, "loss": 0.6439, "step": 7098 }, { "epoch": 0.4997536078845477, "grad_norm": 1.9616082906723022, "learning_rate": 7.346213696612538e-06, "loss": 0.729, "step": 7099 }, { "epoch": 0.4998240056318198, "grad_norm": 1.9506783485412598, "learning_rate": 7.344619535013199e-06, "loss": 0.632, "step": 7100 }, { "epoch": 0.49989440337909186, "grad_norm": 1.507854700088501, "learning_rate": 7.343025355496777e-06, "loss": 0.7618, "step": 7101 }, { "epoch": 0.49996480112636393, "grad_norm": 1.787569284439087, "learning_rate": 7.3414311581461575e-06, "loss": 0.7279, "step": 7102 }, { "epoch": 0.500035198873636, "grad_norm": 1.6318904161453247, "learning_rate": 7.339836943044222e-06, "loss": 0.6585, "step": 7103 }, { "epoch": 0.5001055966209081, "grad_norm": 1.770500659942627, "learning_rate": 7.338242710273856e-06, "loss": 0.7299, "step": 7104 }, { "epoch": 0.5001759943681802, "grad_norm": 1.5977431535720825, "learning_rate": 7.3366484599179464e-06, "loss": 0.6059, "step": 7105 }, { "epoch": 0.5002463921154523, "grad_norm": 1.6358646154403687, "learning_rate": 7.335054192059379e-06, "loss": 0.7664, "step": 7106 }, { "epoch": 0.5003167898627244, "grad_norm": 1.546375036239624, "learning_rate": 7.333459906781038e-06, "loss": 0.6073, "step": 7107 }, { "epoch": 0.5003871876099965, "grad_norm": 1.6986106634140015, "learning_rate": 7.331865604165817e-06, "loss": 0.6012, "step": 7108 }, { "epoch": 0.5004575853572686, "grad_norm": 1.9632481336593628, "learning_rate": 7.330271284296601e-06, "loss": 0.7696, "step": 7109 }, { "epoch": 0.5005279831045406, "grad_norm": 1.8008967638015747, "learning_rate": 7.328676947256283e-06, "loss": 0.8308, "step": 7110 }, { "epoch": 0.5005983808518127, "grad_norm": 1.7403053045272827, "learning_rate": 7.327082593127753e-06, "loss": 0.6851, "step": 7111 }, { "epoch": 0.5006687785990849, "grad_norm": 1.5456092357635498, "learning_rate": 7.3254882219939e-06, "loss": 0.6895, "step": 7112 }, { "epoch": 0.5007391763463569, "grad_norm": 1.605785608291626, "learning_rate": 7.3238938339376225e-06, "loss": 0.619, "step": 7113 }, { "epoch": 0.500809574093629, "grad_norm": 1.5502943992614746, "learning_rate": 7.322299429041811e-06, "loss": 0.6959, "step": 7114 }, { "epoch": 0.500879971840901, "grad_norm": 1.7430022954940796, "learning_rate": 7.320705007389358e-06, "loss": 0.6782, "step": 7115 }, { "epoch": 0.5009503695881732, "grad_norm": 1.8654053211212158, "learning_rate": 7.319110569063163e-06, "loss": 0.75, "step": 7116 }, { "epoch": 0.5010207673354453, "grad_norm": 2.0523219108581543, "learning_rate": 7.317516114146122e-06, "loss": 0.8049, "step": 7117 }, { "epoch": 0.5010911650827173, "grad_norm": 1.8232492208480835, "learning_rate": 7.315921642721129e-06, "loss": 0.7184, "step": 7118 }, { "epoch": 0.5011615628299895, "grad_norm": 1.39983069896698, "learning_rate": 7.314327154871082e-06, "loss": 0.6538, "step": 7119 }, { "epoch": 0.5012319605772615, "grad_norm": 1.7261128425598145, "learning_rate": 7.312732650678884e-06, "loss": 0.755, "step": 7120 }, { "epoch": 0.5013023583245336, "grad_norm": 1.7712175846099854, "learning_rate": 7.311138130227431e-06, "loss": 0.7795, "step": 7121 }, { "epoch": 0.5013727560718056, "grad_norm": 1.8834385871887207, "learning_rate": 7.309543593599626e-06, "loss": 0.7372, "step": 7122 }, { "epoch": 0.5014431538190778, "grad_norm": 1.5355371236801147, "learning_rate": 7.307949040878369e-06, "loss": 0.6514, "step": 7123 }, { "epoch": 0.5015135515663499, "grad_norm": 1.7859551906585693, "learning_rate": 7.306354472146561e-06, "loss": 0.6314, "step": 7124 }, { "epoch": 0.5015839493136219, "grad_norm": 1.8737505674362183, "learning_rate": 7.304759887487109e-06, "loss": 0.6674, "step": 7125 }, { "epoch": 0.5016543470608941, "grad_norm": 1.6584560871124268, "learning_rate": 7.3031652869829134e-06, "loss": 0.6545, "step": 7126 }, { "epoch": 0.5017247448081661, "grad_norm": 1.7574843168258667, "learning_rate": 7.301570670716878e-06, "loss": 0.6592, "step": 7127 }, { "epoch": 0.5017951425554382, "grad_norm": 2.105433225631714, "learning_rate": 7.299976038771912e-06, "loss": 0.683, "step": 7128 }, { "epoch": 0.5018655403027104, "grad_norm": 1.6905643939971924, "learning_rate": 7.298381391230921e-06, "loss": 0.6744, "step": 7129 }, { "epoch": 0.5019359380499824, "grad_norm": 1.8170329332351685, "learning_rate": 7.29678672817681e-06, "loss": 0.5902, "step": 7130 }, { "epoch": 0.5020063357972545, "grad_norm": 1.7687439918518066, "learning_rate": 7.295192049692488e-06, "loss": 0.8027, "step": 7131 }, { "epoch": 0.5020767335445265, "grad_norm": 1.761724591255188, "learning_rate": 7.293597355860866e-06, "loss": 0.7348, "step": 7132 }, { "epoch": 0.5021471312917987, "grad_norm": 1.6922687292099, "learning_rate": 7.292002646764849e-06, "loss": 0.5966, "step": 7133 }, { "epoch": 0.5022175290390708, "grad_norm": 1.8522456884384155, "learning_rate": 7.29040792248735e-06, "loss": 0.69, "step": 7134 }, { "epoch": 0.5022879267863428, "grad_norm": 1.6940529346466064, "learning_rate": 7.288813183111283e-06, "loss": 0.6609, "step": 7135 }, { "epoch": 0.502358324533615, "grad_norm": 1.7849763631820679, "learning_rate": 7.2872184287195534e-06, "loss": 0.7766, "step": 7136 }, { "epoch": 0.502428722280887, "grad_norm": 1.767564058303833, "learning_rate": 7.285623659395079e-06, "loss": 0.7104, "step": 7137 }, { "epoch": 0.5024991200281591, "grad_norm": 1.7830849885940552, "learning_rate": 7.284028875220774e-06, "loss": 0.6966, "step": 7138 }, { "epoch": 0.5025695177754312, "grad_norm": 1.9503488540649414, "learning_rate": 7.282434076279548e-06, "loss": 0.6704, "step": 7139 }, { "epoch": 0.5026399155227033, "grad_norm": 1.7715520858764648, "learning_rate": 7.280839262654318e-06, "loss": 0.8012, "step": 7140 }, { "epoch": 0.5027103132699754, "grad_norm": 1.6110767126083374, "learning_rate": 7.279244434428002e-06, "loss": 0.6652, "step": 7141 }, { "epoch": 0.5027807110172474, "grad_norm": 2.196089506149292, "learning_rate": 7.2776495916835145e-06, "loss": 0.5907, "step": 7142 }, { "epoch": 0.5028511087645195, "grad_norm": 2.0235021114349365, "learning_rate": 7.276054734503773e-06, "loss": 0.7934, "step": 7143 }, { "epoch": 0.5029215065117916, "grad_norm": 2.0558760166168213, "learning_rate": 7.2744598629716966e-06, "loss": 0.6466, "step": 7144 }, { "epoch": 0.5029919042590637, "grad_norm": 2.148022413253784, "learning_rate": 7.272864977170202e-06, "loss": 0.7938, "step": 7145 }, { "epoch": 0.5030623020063358, "grad_norm": 1.7599444389343262, "learning_rate": 7.271270077182211e-06, "loss": 0.7471, "step": 7146 }, { "epoch": 0.5031326997536079, "grad_norm": 1.636151671409607, "learning_rate": 7.269675163090643e-06, "loss": 0.7012, "step": 7147 }, { "epoch": 0.50320309750088, "grad_norm": 1.57837975025177, "learning_rate": 7.268080234978419e-06, "loss": 0.6114, "step": 7148 }, { "epoch": 0.503273495248152, "grad_norm": 1.9540324211120605, "learning_rate": 7.266485292928461e-06, "loss": 0.7915, "step": 7149 }, { "epoch": 0.5033438929954241, "grad_norm": 1.7327295541763306, "learning_rate": 7.264890337023693e-06, "loss": 0.7472, "step": 7150 }, { "epoch": 0.5034142907426963, "grad_norm": 1.790114402770996, "learning_rate": 7.263295367347033e-06, "loss": 0.7328, "step": 7151 }, { "epoch": 0.5034846884899683, "grad_norm": 2.052381992340088, "learning_rate": 7.261700383981411e-06, "loss": 0.7211, "step": 7152 }, { "epoch": 0.5035550862372404, "grad_norm": 2.0784382820129395, "learning_rate": 7.260105387009749e-06, "loss": 0.7022, "step": 7153 }, { "epoch": 0.5036254839845125, "grad_norm": 2.3974592685699463, "learning_rate": 7.258510376514971e-06, "loss": 0.5957, "step": 7154 }, { "epoch": 0.5036958817317846, "grad_norm": 1.9683984518051147, "learning_rate": 7.256915352580003e-06, "loss": 0.606, "step": 7155 }, { "epoch": 0.5037662794790567, "grad_norm": 1.7287384271621704, "learning_rate": 7.255320315287777e-06, "loss": 0.7053, "step": 7156 }, { "epoch": 0.5038366772263287, "grad_norm": 1.772331953048706, "learning_rate": 7.2537252647212145e-06, "loss": 0.6359, "step": 7157 }, { "epoch": 0.5039070749736009, "grad_norm": 1.541143536567688, "learning_rate": 7.252130200963245e-06, "loss": 0.7639, "step": 7158 }, { "epoch": 0.5039774727208729, "grad_norm": 2.170760154724121, "learning_rate": 7.250535124096801e-06, "loss": 0.7871, "step": 7159 }, { "epoch": 0.504047870468145, "grad_norm": 1.9154008626937866, "learning_rate": 7.2489400342048044e-06, "loss": 0.6043, "step": 7160 }, { "epoch": 0.504118268215417, "grad_norm": 1.6673364639282227, "learning_rate": 7.247344931370193e-06, "loss": 0.7163, "step": 7161 }, { "epoch": 0.5041886659626892, "grad_norm": 1.8303697109222412, "learning_rate": 7.245749815675894e-06, "loss": 0.6836, "step": 7162 }, { "epoch": 0.5042590637099613, "grad_norm": 1.7102457284927368, "learning_rate": 7.24415468720484e-06, "loss": 0.7046, "step": 7163 }, { "epoch": 0.5043294614572333, "grad_norm": 1.73294198513031, "learning_rate": 7.242559546039962e-06, "loss": 0.7331, "step": 7164 }, { "epoch": 0.5043998592045055, "grad_norm": 1.9080764055252075, "learning_rate": 7.240964392264195e-06, "loss": 0.6396, "step": 7165 }, { "epoch": 0.5044702569517775, "grad_norm": 1.8342314958572388, "learning_rate": 7.239369225960469e-06, "loss": 0.7046, "step": 7166 }, { "epoch": 0.5045406546990496, "grad_norm": 1.6073322296142578, "learning_rate": 7.237774047211721e-06, "loss": 0.6448, "step": 7167 }, { "epoch": 0.5046110524463218, "grad_norm": 1.8853185176849365, "learning_rate": 7.236178856100886e-06, "loss": 0.6386, "step": 7168 }, { "epoch": 0.5046814501935938, "grad_norm": 1.8373249769210815, "learning_rate": 7.234583652710895e-06, "loss": 0.6923, "step": 7169 }, { "epoch": 0.5047518479408659, "grad_norm": 3.1050891876220703, "learning_rate": 7.232988437124689e-06, "loss": 0.8002, "step": 7170 }, { "epoch": 0.5048222456881379, "grad_norm": 1.8506361246109009, "learning_rate": 7.231393209425206e-06, "loss": 0.5963, "step": 7171 }, { "epoch": 0.5048926434354101, "grad_norm": 1.693061113357544, "learning_rate": 7.229797969695378e-06, "loss": 0.7014, "step": 7172 }, { "epoch": 0.5049630411826822, "grad_norm": 1.800511360168457, "learning_rate": 7.228202718018145e-06, "loss": 0.6652, "step": 7173 }, { "epoch": 0.5050334389299542, "grad_norm": 1.9508299827575684, "learning_rate": 7.226607454476448e-06, "loss": 0.6739, "step": 7174 }, { "epoch": 0.5051038366772264, "grad_norm": 1.8487663269042969, "learning_rate": 7.225012179153221e-06, "loss": 0.717, "step": 7175 }, { "epoch": 0.5051742344244984, "grad_norm": 1.779911756515503, "learning_rate": 7.2234168921314096e-06, "loss": 0.6634, "step": 7176 }, { "epoch": 0.5052446321717705, "grad_norm": 1.687700867652893, "learning_rate": 7.221821593493951e-06, "loss": 0.664, "step": 7177 }, { "epoch": 0.5053150299190425, "grad_norm": 1.9689269065856934, "learning_rate": 7.220226283323788e-06, "loss": 0.7029, "step": 7178 }, { "epoch": 0.5053854276663147, "grad_norm": 1.9033743143081665, "learning_rate": 7.21863096170386e-06, "loss": 0.8109, "step": 7179 }, { "epoch": 0.5054558254135868, "grad_norm": 1.9979774951934814, "learning_rate": 7.21703562871711e-06, "loss": 0.7432, "step": 7180 }, { "epoch": 0.5055262231608588, "grad_norm": 2.107221841812134, "learning_rate": 7.215440284446481e-06, "loss": 0.6958, "step": 7181 }, { "epoch": 0.505596620908131, "grad_norm": 1.423128366470337, "learning_rate": 7.213844928974916e-06, "loss": 0.6254, "step": 7182 }, { "epoch": 0.505667018655403, "grad_norm": 1.9390244483947754, "learning_rate": 7.212249562385361e-06, "loss": 0.6759, "step": 7183 }, { "epoch": 0.5057374164026751, "grad_norm": 2.2053797245025635, "learning_rate": 7.210654184760759e-06, "loss": 0.6848, "step": 7184 }, { "epoch": 0.5058078141499472, "grad_norm": 1.925341248512268, "learning_rate": 7.2090587961840545e-06, "loss": 0.6335, "step": 7185 }, { "epoch": 0.5058782118972193, "grad_norm": 1.6882789134979248, "learning_rate": 7.207463396738193e-06, "loss": 0.718, "step": 7186 }, { "epoch": 0.5059486096444914, "grad_norm": 1.6522361040115356, "learning_rate": 7.205867986506124e-06, "loss": 0.729, "step": 7187 }, { "epoch": 0.5060190073917634, "grad_norm": 1.8419979810714722, "learning_rate": 7.204272565570789e-06, "loss": 0.6755, "step": 7188 }, { "epoch": 0.5060894051390356, "grad_norm": 2.04264497756958, "learning_rate": 7.2026771340151415e-06, "loss": 0.7164, "step": 7189 }, { "epoch": 0.5061598028863077, "grad_norm": 1.740890383720398, "learning_rate": 7.201081691922123e-06, "loss": 0.7199, "step": 7190 }, { "epoch": 0.5062302006335797, "grad_norm": 2.401780128479004, "learning_rate": 7.199486239374685e-06, "loss": 0.6574, "step": 7191 }, { "epoch": 0.5063005983808518, "grad_norm": 1.7312732934951782, "learning_rate": 7.19789077645578e-06, "loss": 0.7695, "step": 7192 }, { "epoch": 0.5063709961281239, "grad_norm": 2.0891263484954834, "learning_rate": 7.196295303248351e-06, "loss": 0.6916, "step": 7193 }, { "epoch": 0.506441393875396, "grad_norm": 2.103720188140869, "learning_rate": 7.194699819835353e-06, "loss": 0.8183, "step": 7194 }, { "epoch": 0.5065117916226681, "grad_norm": 1.9978855848312378, "learning_rate": 7.1931043262997375e-06, "loss": 0.7222, "step": 7195 }, { "epoch": 0.5065821893699402, "grad_norm": 1.9714429378509521, "learning_rate": 7.191508822724451e-06, "loss": 0.6835, "step": 7196 }, { "epoch": 0.5066525871172123, "grad_norm": 3.737884759902954, "learning_rate": 7.189913309192446e-06, "loss": 0.7012, "step": 7197 }, { "epoch": 0.5067229848644843, "grad_norm": 2.326138496398926, "learning_rate": 7.188317785786678e-06, "loss": 0.6443, "step": 7198 }, { "epoch": 0.5067933826117564, "grad_norm": 2.2467241287231445, "learning_rate": 7.1867222525900965e-06, "loss": 0.6672, "step": 7199 }, { "epoch": 0.5068637803590285, "grad_norm": 1.7143731117248535, "learning_rate": 7.185126709685656e-06, "loss": 0.6272, "step": 7200 }, { "epoch": 0.5069341781063006, "grad_norm": 2.0546677112579346, "learning_rate": 7.183531157156312e-06, "loss": 0.715, "step": 7201 }, { "epoch": 0.5070045758535727, "grad_norm": 1.734505534172058, "learning_rate": 7.181935595085016e-06, "loss": 0.7118, "step": 7202 }, { "epoch": 0.5070749736008447, "grad_norm": 1.9087351560592651, "learning_rate": 7.1803400235547224e-06, "loss": 0.682, "step": 7203 }, { "epoch": 0.5071453713481169, "grad_norm": 1.8142451047897339, "learning_rate": 7.178744442648389e-06, "loss": 0.5935, "step": 7204 }, { "epoch": 0.5072157690953889, "grad_norm": 2.5142006874084473, "learning_rate": 7.17714885244897e-06, "loss": 0.6111, "step": 7205 }, { "epoch": 0.507286166842661, "grad_norm": 1.7994918823242188, "learning_rate": 7.175553253039421e-06, "loss": 0.6354, "step": 7206 }, { "epoch": 0.5073565645899332, "grad_norm": 1.820534586906433, "learning_rate": 7.1739576445027e-06, "loss": 0.6504, "step": 7207 }, { "epoch": 0.5074269623372052, "grad_norm": 1.9252947568893433, "learning_rate": 7.1723620269217635e-06, "loss": 0.6892, "step": 7208 }, { "epoch": 0.5074973600844773, "grad_norm": 1.690758466720581, "learning_rate": 7.170766400379568e-06, "loss": 0.6298, "step": 7209 }, { "epoch": 0.5075677578317493, "grad_norm": 1.7276288270950317, "learning_rate": 7.169170764959074e-06, "loss": 0.7075, "step": 7210 }, { "epoch": 0.5076381555790215, "grad_norm": 2.1175191402435303, "learning_rate": 7.167575120743239e-06, "loss": 0.7977, "step": 7211 }, { "epoch": 0.5077085533262936, "grad_norm": 2.422945022583008, "learning_rate": 7.165979467815021e-06, "loss": 0.6021, "step": 7212 }, { "epoch": 0.5077789510735656, "grad_norm": 1.8297063112258911, "learning_rate": 7.164383806257381e-06, "loss": 0.6913, "step": 7213 }, { "epoch": 0.5078493488208378, "grad_norm": 1.57808518409729, "learning_rate": 7.162788136153277e-06, "loss": 0.7092, "step": 7214 }, { "epoch": 0.5079197465681098, "grad_norm": 1.6981334686279297, "learning_rate": 7.161192457585669e-06, "loss": 0.6594, "step": 7215 }, { "epoch": 0.5079901443153819, "grad_norm": 1.7312153577804565, "learning_rate": 7.159596770637523e-06, "loss": 0.6934, "step": 7216 }, { "epoch": 0.5080605420626539, "grad_norm": 1.7815219163894653, "learning_rate": 7.158001075391794e-06, "loss": 0.6789, "step": 7217 }, { "epoch": 0.5081309398099261, "grad_norm": 1.9189003705978394, "learning_rate": 7.156405371931446e-06, "loss": 0.6263, "step": 7218 }, { "epoch": 0.5082013375571982, "grad_norm": 2.509727716445923, "learning_rate": 7.154809660339442e-06, "loss": 0.7017, "step": 7219 }, { "epoch": 0.5082717353044702, "grad_norm": 2.1062777042388916, "learning_rate": 7.153213940698742e-06, "loss": 0.5922, "step": 7220 }, { "epoch": 0.5083421330517424, "grad_norm": 1.9614536762237549, "learning_rate": 7.15161821309231e-06, "loss": 0.7441, "step": 7221 }, { "epoch": 0.5084125307990144, "grad_norm": 2.0660202503204346, "learning_rate": 7.150022477603112e-06, "loss": 0.8596, "step": 7222 }, { "epoch": 0.5084829285462865, "grad_norm": 2.2284786701202393, "learning_rate": 7.1484267343141084e-06, "loss": 0.6416, "step": 7223 }, { "epoch": 0.5085533262935586, "grad_norm": 1.5946940183639526, "learning_rate": 7.146830983308263e-06, "loss": 0.5728, "step": 7224 }, { "epoch": 0.5086237240408307, "grad_norm": 2.299398422241211, "learning_rate": 7.145235224668546e-06, "loss": 0.6643, "step": 7225 }, { "epoch": 0.5086941217881028, "grad_norm": 1.8377124071121216, "learning_rate": 7.143639458477915e-06, "loss": 0.6262, "step": 7226 }, { "epoch": 0.5087645195353748, "grad_norm": 1.8305447101593018, "learning_rate": 7.142043684819339e-06, "loss": 0.7026, "step": 7227 }, { "epoch": 0.508834917282647, "grad_norm": 1.5226151943206787, "learning_rate": 7.140447903775783e-06, "loss": 0.7186, "step": 7228 }, { "epoch": 0.5089053150299191, "grad_norm": 1.7863333225250244, "learning_rate": 7.138852115430214e-06, "loss": 0.7077, "step": 7229 }, { "epoch": 0.5089757127771911, "grad_norm": 1.9445303678512573, "learning_rate": 7.137256319865597e-06, "loss": 0.7002, "step": 7230 }, { "epoch": 0.5090461105244632, "grad_norm": 1.8642805814743042, "learning_rate": 7.135660517164901e-06, "loss": 0.6012, "step": 7231 }, { "epoch": 0.5091165082717353, "grad_norm": 2.1673085689544678, "learning_rate": 7.1340647074110905e-06, "loss": 0.679, "step": 7232 }, { "epoch": 0.5091869060190074, "grad_norm": 1.8513344526290894, "learning_rate": 7.132468890687133e-06, "loss": 0.6885, "step": 7233 }, { "epoch": 0.5092573037662795, "grad_norm": 3.6087520122528076, "learning_rate": 7.1308730670760005e-06, "loss": 0.7511, "step": 7234 }, { "epoch": 0.5093277015135516, "grad_norm": 1.783063530921936, "learning_rate": 7.129277236660658e-06, "loss": 0.6081, "step": 7235 }, { "epoch": 0.5093980992608237, "grad_norm": 1.844086766242981, "learning_rate": 7.127681399524074e-06, "loss": 0.5937, "step": 7236 }, { "epoch": 0.5094684970080957, "grad_norm": 1.8665753602981567, "learning_rate": 7.126085555749221e-06, "loss": 0.6858, "step": 7237 }, { "epoch": 0.5095388947553678, "grad_norm": 1.6276103258132935, "learning_rate": 7.124489705419063e-06, "loss": 0.7157, "step": 7238 }, { "epoch": 0.5096092925026399, "grad_norm": 1.967496633529663, "learning_rate": 7.122893848616572e-06, "loss": 0.8549, "step": 7239 }, { "epoch": 0.509679690249912, "grad_norm": 1.9417833089828491, "learning_rate": 7.121297985424722e-06, "loss": 0.6482, "step": 7240 }, { "epoch": 0.5097500879971841, "grad_norm": 1.8633122444152832, "learning_rate": 7.119702115926475e-06, "loss": 0.773, "step": 7241 }, { "epoch": 0.5098204857444562, "grad_norm": 1.5895823240280151, "learning_rate": 7.118106240204809e-06, "loss": 0.6198, "step": 7242 }, { "epoch": 0.5098908834917283, "grad_norm": 1.6082289218902588, "learning_rate": 7.1165103583426935e-06, "loss": 0.7093, "step": 7243 }, { "epoch": 0.5099612812390003, "grad_norm": 1.7809982299804688, "learning_rate": 7.114914470423098e-06, "loss": 0.6465, "step": 7244 }, { "epoch": 0.5100316789862724, "grad_norm": 1.7603678703308105, "learning_rate": 7.1133185765289956e-06, "loss": 0.7207, "step": 7245 }, { "epoch": 0.5101020767335446, "grad_norm": 1.9739665985107422, "learning_rate": 7.111722676743358e-06, "loss": 0.6414, "step": 7246 }, { "epoch": 0.5101724744808166, "grad_norm": 1.5993940830230713, "learning_rate": 7.110126771149157e-06, "loss": 0.5985, "step": 7247 }, { "epoch": 0.5102428722280887, "grad_norm": 2.09397292137146, "learning_rate": 7.108530859829365e-06, "loss": 0.7616, "step": 7248 }, { "epoch": 0.5103132699753608, "grad_norm": 1.5309827327728271, "learning_rate": 7.1069349428669564e-06, "loss": 0.675, "step": 7249 }, { "epoch": 0.5103836677226329, "grad_norm": 1.6824051141738892, "learning_rate": 7.1053390203449026e-06, "loss": 0.6884, "step": 7250 }, { "epoch": 0.510454065469905, "grad_norm": 1.809202790260315, "learning_rate": 7.103743092346176e-06, "loss": 0.6487, "step": 7251 }, { "epoch": 0.510524463217177, "grad_norm": 1.935417890548706, "learning_rate": 7.102147158953754e-06, "loss": 0.7213, "step": 7252 }, { "epoch": 0.5105948609644492, "grad_norm": 1.6642048358917236, "learning_rate": 7.100551220250609e-06, "loss": 0.8042, "step": 7253 }, { "epoch": 0.5106652587117212, "grad_norm": 1.8413852453231812, "learning_rate": 7.098955276319714e-06, "loss": 0.6426, "step": 7254 }, { "epoch": 0.5107356564589933, "grad_norm": 1.8260184526443481, "learning_rate": 7.097359327244046e-06, "loss": 0.7916, "step": 7255 }, { "epoch": 0.5108060542062653, "grad_norm": 1.6348304748535156, "learning_rate": 7.095763373106577e-06, "loss": 0.7005, "step": 7256 }, { "epoch": 0.5108764519535375, "grad_norm": 1.8281441926956177, "learning_rate": 7.094167413990283e-06, "loss": 0.6835, "step": 7257 }, { "epoch": 0.5109468497008096, "grad_norm": 2.9072799682617188, "learning_rate": 7.092571449978142e-06, "loss": 0.713, "step": 7258 }, { "epoch": 0.5110172474480816, "grad_norm": 1.8686411380767822, "learning_rate": 7.090975481153126e-06, "loss": 0.6331, "step": 7259 }, { "epoch": 0.5110876451953538, "grad_norm": 1.8354483842849731, "learning_rate": 7.089379507598212e-06, "loss": 0.7569, "step": 7260 }, { "epoch": 0.5111580429426258, "grad_norm": 1.5587434768676758, "learning_rate": 7.087783529396378e-06, "loss": 0.6427, "step": 7261 }, { "epoch": 0.5112284406898979, "grad_norm": 1.6978719234466553, "learning_rate": 7.086187546630598e-06, "loss": 0.768, "step": 7262 }, { "epoch": 0.5112988384371701, "grad_norm": 1.8822331428527832, "learning_rate": 7.0845915593838485e-06, "loss": 0.6839, "step": 7263 }, { "epoch": 0.5113692361844421, "grad_norm": 1.617148518562317, "learning_rate": 7.0829955677391086e-06, "loss": 0.6707, "step": 7264 }, { "epoch": 0.5114396339317142, "grad_norm": 1.7654285430908203, "learning_rate": 7.081399571779353e-06, "loss": 0.6298, "step": 7265 }, { "epoch": 0.5115100316789862, "grad_norm": 1.526501178741455, "learning_rate": 7.0798035715875595e-06, "loss": 0.6018, "step": 7266 }, { "epoch": 0.5115804294262584, "grad_norm": 1.8355048894882202, "learning_rate": 7.078207567246708e-06, "loss": 0.6465, "step": 7267 }, { "epoch": 0.5116508271735305, "grad_norm": 1.9714993238449097, "learning_rate": 7.076611558839773e-06, "loss": 0.7634, "step": 7268 }, { "epoch": 0.5117212249208025, "grad_norm": 1.691249966621399, "learning_rate": 7.0750155464497324e-06, "loss": 0.6965, "step": 7269 }, { "epoch": 0.5117916226680747, "grad_norm": 1.8791855573654175, "learning_rate": 7.073419530159568e-06, "loss": 0.6786, "step": 7270 }, { "epoch": 0.5118620204153467, "grad_norm": 1.818517804145813, "learning_rate": 7.071823510052255e-06, "loss": 0.7568, "step": 7271 }, { "epoch": 0.5119324181626188, "grad_norm": 1.9779372215270996, "learning_rate": 7.070227486210772e-06, "loss": 0.661, "step": 7272 }, { "epoch": 0.5120028159098908, "grad_norm": 1.652539849281311, "learning_rate": 7.068631458718099e-06, "loss": 0.6455, "step": 7273 }, { "epoch": 0.512073213657163, "grad_norm": 2.004509210586548, "learning_rate": 7.067035427657214e-06, "loss": 0.8217, "step": 7274 }, { "epoch": 0.5121436114044351, "grad_norm": 2.0231258869171143, "learning_rate": 7.065439393111096e-06, "loss": 0.6498, "step": 7275 }, { "epoch": 0.5122140091517071, "grad_norm": 1.6902931928634644, "learning_rate": 7.063843355162725e-06, "loss": 0.6542, "step": 7276 }, { "epoch": 0.5122844068989792, "grad_norm": 1.927322268486023, "learning_rate": 7.06224731389508e-06, "loss": 0.5698, "step": 7277 }, { "epoch": 0.5123548046462513, "grad_norm": 1.8542006015777588, "learning_rate": 7.060651269391141e-06, "loss": 0.7442, "step": 7278 }, { "epoch": 0.5124252023935234, "grad_norm": 2.0353829860687256, "learning_rate": 7.05905522173389e-06, "loss": 0.6151, "step": 7279 }, { "epoch": 0.5124956001407955, "grad_norm": 2.0359506607055664, "learning_rate": 7.057459171006301e-06, "loss": 0.7748, "step": 7280 }, { "epoch": 0.5125659978880676, "grad_norm": 1.906287431716919, "learning_rate": 7.055863117291358e-06, "loss": 0.6373, "step": 7281 }, { "epoch": 0.5126363956353397, "grad_norm": 1.8468117713928223, "learning_rate": 7.0542670606720435e-06, "loss": 0.6838, "step": 7282 }, { "epoch": 0.5127067933826117, "grad_norm": 1.752212405204773, "learning_rate": 7.052671001231331e-06, "loss": 0.7092, "step": 7283 }, { "epoch": 0.5127771911298838, "grad_norm": 1.7946126461029053, "learning_rate": 7.0510749390522085e-06, "loss": 0.6601, "step": 7284 }, { "epoch": 0.512847588877156, "grad_norm": 1.8366667032241821, "learning_rate": 7.049478874217655e-06, "loss": 0.7965, "step": 7285 }, { "epoch": 0.512917986624428, "grad_norm": 1.6423064470291138, "learning_rate": 7.047882806810647e-06, "loss": 0.7965, "step": 7286 }, { "epoch": 0.5129883843717001, "grad_norm": 1.730243444442749, "learning_rate": 7.0462867369141696e-06, "loss": 0.7173, "step": 7287 }, { "epoch": 0.5130587821189722, "grad_norm": 1.6485155820846558, "learning_rate": 7.044690664611203e-06, "loss": 0.6642, "step": 7288 }, { "epoch": 0.5131291798662443, "grad_norm": 1.6917983293533325, "learning_rate": 7.043094589984729e-06, "loss": 0.7616, "step": 7289 }, { "epoch": 0.5131995776135164, "grad_norm": 1.6673225164413452, "learning_rate": 7.041498513117726e-06, "loss": 0.6413, "step": 7290 }, { "epoch": 0.5132699753607884, "grad_norm": 1.6064687967300415, "learning_rate": 7.039902434093179e-06, "loss": 0.6721, "step": 7291 }, { "epoch": 0.5133403731080606, "grad_norm": 1.7484149932861328, "learning_rate": 7.038306352994068e-06, "loss": 0.7397, "step": 7292 }, { "epoch": 0.5134107708553326, "grad_norm": 1.402335524559021, "learning_rate": 7.036710269903375e-06, "loss": 0.5711, "step": 7293 }, { "epoch": 0.5134811686026047, "grad_norm": 1.6476999521255493, "learning_rate": 7.035114184904084e-06, "loss": 0.7219, "step": 7294 }, { "epoch": 0.5135515663498768, "grad_norm": 1.8207392692565918, "learning_rate": 7.033518098079173e-06, "loss": 0.6599, "step": 7295 }, { "epoch": 0.5136219640971489, "grad_norm": 1.704302430152893, "learning_rate": 7.031922009511624e-06, "loss": 0.7425, "step": 7296 }, { "epoch": 0.513692361844421, "grad_norm": 1.7492698431015015, "learning_rate": 7.030325919284424e-06, "loss": 0.6558, "step": 7297 }, { "epoch": 0.513762759591693, "grad_norm": 1.8424549102783203, "learning_rate": 7.028729827480551e-06, "loss": 0.6834, "step": 7298 }, { "epoch": 0.5138331573389652, "grad_norm": 1.8522762060165405, "learning_rate": 7.027133734182986e-06, "loss": 0.6569, "step": 7299 }, { "epoch": 0.5139035550862372, "grad_norm": 2.1853513717651367, "learning_rate": 7.025537639474717e-06, "loss": 0.7542, "step": 7300 }, { "epoch": 0.5139739528335093, "grad_norm": 1.7360774278640747, "learning_rate": 7.023941543438721e-06, "loss": 0.6347, "step": 7301 }, { "epoch": 0.5140443505807815, "grad_norm": 1.5875208377838135, "learning_rate": 7.022345446157983e-06, "loss": 0.7857, "step": 7302 }, { "epoch": 0.5141147483280535, "grad_norm": 1.9831496477127075, "learning_rate": 7.020749347715486e-06, "loss": 0.5826, "step": 7303 }, { "epoch": 0.5141851460753256, "grad_norm": 1.7789360284805298, "learning_rate": 7.01915324819421e-06, "loss": 0.6163, "step": 7304 }, { "epoch": 0.5142555438225976, "grad_norm": 1.6004186868667603, "learning_rate": 7.017557147677139e-06, "loss": 0.6778, "step": 7305 }, { "epoch": 0.5143259415698698, "grad_norm": 1.6345031261444092, "learning_rate": 7.015961046247258e-06, "loss": 0.7549, "step": 7306 }, { "epoch": 0.5143963393171419, "grad_norm": 2.5310299396514893, "learning_rate": 7.014364943987546e-06, "loss": 0.6744, "step": 7307 }, { "epoch": 0.5144667370644139, "grad_norm": 1.7102280855178833, "learning_rate": 7.012768840980988e-06, "loss": 0.6961, "step": 7308 }, { "epoch": 0.5145371348116861, "grad_norm": 1.6429734230041504, "learning_rate": 7.011172737310567e-06, "loss": 0.6519, "step": 7309 }, { "epoch": 0.5146075325589581, "grad_norm": 1.5758413076400757, "learning_rate": 7.009576633059265e-06, "loss": 0.746, "step": 7310 }, { "epoch": 0.5146779303062302, "grad_norm": 1.8911726474761963, "learning_rate": 7.007980528310064e-06, "loss": 0.6699, "step": 7311 }, { "epoch": 0.5147483280535022, "grad_norm": 1.9318925142288208, "learning_rate": 7.00638442314595e-06, "loss": 0.6744, "step": 7312 }, { "epoch": 0.5148187258007744, "grad_norm": 1.5567445755004883, "learning_rate": 7.004788317649902e-06, "loss": 0.6954, "step": 7313 }, { "epoch": 0.5148891235480465, "grad_norm": 1.8238369226455688, "learning_rate": 7.003192211904906e-06, "loss": 0.7105, "step": 7314 }, { "epoch": 0.5149595212953185, "grad_norm": 1.992449402809143, "learning_rate": 7.0015961059939455e-06, "loss": 0.6203, "step": 7315 }, { "epoch": 0.5150299190425907, "grad_norm": 1.6632627248764038, "learning_rate": 7e-06, "loss": 0.6056, "step": 7316 }, { "epoch": 0.5151003167898627, "grad_norm": 1.8979486227035522, "learning_rate": 6.998403894006055e-06, "loss": 0.7659, "step": 7317 }, { "epoch": 0.5151707145371348, "grad_norm": 2.084355115890503, "learning_rate": 6.996807788095094e-06, "loss": 0.7256, "step": 7318 }, { "epoch": 0.5152411122844069, "grad_norm": 1.966103434562683, "learning_rate": 6.995211682350097e-06, "loss": 0.8025, "step": 7319 }, { "epoch": 0.515311510031679, "grad_norm": 1.6654789447784424, "learning_rate": 6.993615576854051e-06, "loss": 0.7056, "step": 7320 }, { "epoch": 0.5153819077789511, "grad_norm": 2.279421329498291, "learning_rate": 6.992019471689935e-06, "loss": 0.6904, "step": 7321 }, { "epoch": 0.5154523055262231, "grad_norm": 1.6446737051010132, "learning_rate": 6.990423366940736e-06, "loss": 0.6165, "step": 7322 }, { "epoch": 0.5155227032734953, "grad_norm": 1.7655209302902222, "learning_rate": 6.988827262689433e-06, "loss": 0.7172, "step": 7323 }, { "epoch": 0.5155931010207674, "grad_norm": 1.8152681589126587, "learning_rate": 6.987231159019013e-06, "loss": 0.7333, "step": 7324 }, { "epoch": 0.5156634987680394, "grad_norm": 1.7429927587509155, "learning_rate": 6.985635056012453e-06, "loss": 0.6813, "step": 7325 }, { "epoch": 0.5157338965153115, "grad_norm": 1.5805948972702026, "learning_rate": 6.984038953752743e-06, "loss": 0.6533, "step": 7326 }, { "epoch": 0.5158042942625836, "grad_norm": 2.1669249534606934, "learning_rate": 6.9824428523228604e-06, "loss": 0.7525, "step": 7327 }, { "epoch": 0.5158746920098557, "grad_norm": 1.6395437717437744, "learning_rate": 6.980846751805791e-06, "loss": 0.7248, "step": 7328 }, { "epoch": 0.5159450897571277, "grad_norm": 1.4447835683822632, "learning_rate": 6.979250652284514e-06, "loss": 0.7433, "step": 7329 }, { "epoch": 0.5160154875043999, "grad_norm": 1.703834056854248, "learning_rate": 6.9776545538420176e-06, "loss": 0.782, "step": 7330 }, { "epoch": 0.516085885251672, "grad_norm": 1.8685457706451416, "learning_rate": 6.976058456561278e-06, "loss": 0.73, "step": 7331 }, { "epoch": 0.516156282998944, "grad_norm": 1.6005162000656128, "learning_rate": 6.974462360525283e-06, "loss": 0.8243, "step": 7332 }, { "epoch": 0.5162266807462161, "grad_norm": 1.957362413406372, "learning_rate": 6.972866265817012e-06, "loss": 0.7151, "step": 7333 }, { "epoch": 0.5162970784934882, "grad_norm": 2.100578546524048, "learning_rate": 6.971270172519451e-06, "loss": 0.6194, "step": 7334 }, { "epoch": 0.5163674762407603, "grad_norm": 1.6723664999008179, "learning_rate": 6.9696740807155755e-06, "loss": 0.7407, "step": 7335 }, { "epoch": 0.5164378739880324, "grad_norm": 1.6643610000610352, "learning_rate": 6.9680779904883766e-06, "loss": 0.542, "step": 7336 }, { "epoch": 0.5165082717353044, "grad_norm": 2.090097427368164, "learning_rate": 6.966481901920827e-06, "loss": 0.6134, "step": 7337 }, { "epoch": 0.5165786694825766, "grad_norm": 1.747299313545227, "learning_rate": 6.964885815095916e-06, "loss": 0.6479, "step": 7338 }, { "epoch": 0.5166490672298486, "grad_norm": 2.007554292678833, "learning_rate": 6.963289730096623e-06, "loss": 0.8183, "step": 7339 }, { "epoch": 0.5167194649771207, "grad_norm": 1.6758862733840942, "learning_rate": 6.961693647005933e-06, "loss": 0.8854, "step": 7340 }, { "epoch": 0.5167898627243929, "grad_norm": 1.662482738494873, "learning_rate": 6.960097565906819e-06, "loss": 0.6356, "step": 7341 }, { "epoch": 0.5168602604716649, "grad_norm": 2.334535837173462, "learning_rate": 6.9585014868822755e-06, "loss": 0.7918, "step": 7342 }, { "epoch": 0.516930658218937, "grad_norm": 2.5033926963806152, "learning_rate": 6.956905410015272e-06, "loss": 0.711, "step": 7343 }, { "epoch": 0.517001055966209, "grad_norm": 1.9171693325042725, "learning_rate": 6.955309335388799e-06, "loss": 0.7706, "step": 7344 }, { "epoch": 0.5170714537134812, "grad_norm": 1.5852034091949463, "learning_rate": 6.953713263085831e-06, "loss": 0.6673, "step": 7345 }, { "epoch": 0.5171418514607533, "grad_norm": 1.5376867055892944, "learning_rate": 6.952117193189355e-06, "loss": 0.5406, "step": 7346 }, { "epoch": 0.5172122492080253, "grad_norm": 1.836351990699768, "learning_rate": 6.950521125782346e-06, "loss": 0.7411, "step": 7347 }, { "epoch": 0.5172826469552975, "grad_norm": 1.7956129312515259, "learning_rate": 6.948925060947793e-06, "loss": 0.758, "step": 7348 }, { "epoch": 0.5173530447025695, "grad_norm": 1.520755648612976, "learning_rate": 6.947328998768669e-06, "loss": 0.7097, "step": 7349 }, { "epoch": 0.5174234424498416, "grad_norm": 2.1565463542938232, "learning_rate": 6.94573293932796e-06, "loss": 0.6968, "step": 7350 }, { "epoch": 0.5174938401971136, "grad_norm": 1.6890201568603516, "learning_rate": 6.944136882708643e-06, "loss": 0.713, "step": 7351 }, { "epoch": 0.5175642379443858, "grad_norm": 1.9673614501953125, "learning_rate": 6.942540828993702e-06, "loss": 0.7057, "step": 7352 }, { "epoch": 0.5176346356916579, "grad_norm": 1.8981658220291138, "learning_rate": 6.940944778266112e-06, "loss": 0.7321, "step": 7353 }, { "epoch": 0.5177050334389299, "grad_norm": 1.7329944372177124, "learning_rate": 6.939348730608861e-06, "loss": 0.6226, "step": 7354 }, { "epoch": 0.5177754311862021, "grad_norm": 1.8583217859268188, "learning_rate": 6.937752686104921e-06, "loss": 0.5958, "step": 7355 }, { "epoch": 0.5178458289334741, "grad_norm": 1.7113839387893677, "learning_rate": 6.9361566448372764e-06, "loss": 0.9241, "step": 7356 }, { "epoch": 0.5179162266807462, "grad_norm": 2.87446928024292, "learning_rate": 6.934560606888905e-06, "loss": 0.6057, "step": 7357 }, { "epoch": 0.5179866244280183, "grad_norm": 1.8411356210708618, "learning_rate": 6.9329645723427885e-06, "loss": 0.699, "step": 7358 }, { "epoch": 0.5180570221752904, "grad_norm": 1.8761615753173828, "learning_rate": 6.931368541281902e-06, "loss": 0.7268, "step": 7359 }, { "epoch": 0.5181274199225625, "grad_norm": 1.9560787677764893, "learning_rate": 6.9297725137892296e-06, "loss": 0.5878, "step": 7360 }, { "epoch": 0.5181978176698345, "grad_norm": 1.633263349533081, "learning_rate": 6.928176489947746e-06, "loss": 0.701, "step": 7361 }, { "epoch": 0.5182682154171067, "grad_norm": 1.976098895072937, "learning_rate": 6.926580469840434e-06, "loss": 0.7725, "step": 7362 }, { "epoch": 0.5183386131643788, "grad_norm": 1.7193260192871094, "learning_rate": 6.924984453550267e-06, "loss": 0.6686, "step": 7363 }, { "epoch": 0.5184090109116508, "grad_norm": 1.8226069211959839, "learning_rate": 6.923388441160229e-06, "loss": 0.5811, "step": 7364 }, { "epoch": 0.518479408658923, "grad_norm": 2.4959938526153564, "learning_rate": 6.921792432753293e-06, "loss": 0.8177, "step": 7365 }, { "epoch": 0.518549806406195, "grad_norm": 1.5298210382461548, "learning_rate": 6.920196428412441e-06, "loss": 0.6795, "step": 7366 }, { "epoch": 0.5186202041534671, "grad_norm": 1.754594326019287, "learning_rate": 6.918600428220647e-06, "loss": 0.6038, "step": 7367 }, { "epoch": 0.5186906019007391, "grad_norm": 1.7879782915115356, "learning_rate": 6.917004432260892e-06, "loss": 0.6986, "step": 7368 }, { "epoch": 0.5187609996480113, "grad_norm": 1.9729777574539185, "learning_rate": 6.915408440616152e-06, "loss": 0.7928, "step": 7369 }, { "epoch": 0.5188313973952834, "grad_norm": 2.0853335857391357, "learning_rate": 6.913812453369401e-06, "loss": 0.7744, "step": 7370 }, { "epoch": 0.5189017951425554, "grad_norm": 2.0244839191436768, "learning_rate": 6.912216470603622e-06, "loss": 0.6515, "step": 7371 }, { "epoch": 0.5189721928898275, "grad_norm": 1.6482385396957397, "learning_rate": 6.9106204924017864e-06, "loss": 0.6923, "step": 7372 }, { "epoch": 0.5190425906370996, "grad_norm": 1.9071153402328491, "learning_rate": 6.909024518846874e-06, "loss": 0.6517, "step": 7373 }, { "epoch": 0.5191129883843717, "grad_norm": 1.7285280227661133, "learning_rate": 6.907428550021859e-06, "loss": 0.584, "step": 7374 }, { "epoch": 0.5191833861316438, "grad_norm": 1.664016842842102, "learning_rate": 6.9058325860097174e-06, "loss": 0.6509, "step": 7375 }, { "epoch": 0.5192537838789159, "grad_norm": 1.863820195198059, "learning_rate": 6.904236626893422e-06, "loss": 0.5907, "step": 7376 }, { "epoch": 0.519324181626188, "grad_norm": 1.9558964967727661, "learning_rate": 6.902640672755955e-06, "loss": 0.641, "step": 7377 }, { "epoch": 0.51939457937346, "grad_norm": 2.214848279953003, "learning_rate": 6.9010447236802846e-06, "loss": 0.6435, "step": 7378 }, { "epoch": 0.5194649771207321, "grad_norm": 2.115663528442383, "learning_rate": 6.899448779749392e-06, "loss": 0.6746, "step": 7379 }, { "epoch": 0.5195353748680043, "grad_norm": 1.6854127645492554, "learning_rate": 6.8978528410462454e-06, "loss": 0.8011, "step": 7380 }, { "epoch": 0.5196057726152763, "grad_norm": 2.227003335952759, "learning_rate": 6.896256907653824e-06, "loss": 0.722, "step": 7381 }, { "epoch": 0.5196761703625484, "grad_norm": 1.6735475063323975, "learning_rate": 6.894660979655097e-06, "loss": 0.6599, "step": 7382 }, { "epoch": 0.5197465681098205, "grad_norm": 1.999244213104248, "learning_rate": 6.893065057133044e-06, "loss": 0.6718, "step": 7383 }, { "epoch": 0.5198169658570926, "grad_norm": 1.7448939085006714, "learning_rate": 6.891469140170635e-06, "loss": 0.7927, "step": 7384 }, { "epoch": 0.5198873636043646, "grad_norm": 2.0554862022399902, "learning_rate": 6.8898732288508435e-06, "loss": 0.7326, "step": 7385 }, { "epoch": 0.5199577613516367, "grad_norm": 2.0807201862335205, "learning_rate": 6.888277323256641e-06, "loss": 0.6573, "step": 7386 }, { "epoch": 0.5200281590989089, "grad_norm": 1.9615312814712524, "learning_rate": 6.886681423471005e-06, "loss": 0.7129, "step": 7387 }, { "epoch": 0.5200985568461809, "grad_norm": 2.084454298019409, "learning_rate": 6.8850855295769004e-06, "loss": 0.7203, "step": 7388 }, { "epoch": 0.520168954593453, "grad_norm": 1.6810779571533203, "learning_rate": 6.883489641657307e-06, "loss": 0.6785, "step": 7389 }, { "epoch": 0.520239352340725, "grad_norm": 1.5935664176940918, "learning_rate": 6.88189375979519e-06, "loss": 0.686, "step": 7390 }, { "epoch": 0.5203097500879972, "grad_norm": 1.8562833070755005, "learning_rate": 6.880297884073525e-06, "loss": 0.6922, "step": 7391 }, { "epoch": 0.5203801478352693, "grad_norm": 1.786625623703003, "learning_rate": 6.878702014575279e-06, "loss": 0.7997, "step": 7392 }, { "epoch": 0.5204505455825413, "grad_norm": 2.3165080547332764, "learning_rate": 6.87710615138343e-06, "loss": 0.5694, "step": 7393 }, { "epoch": 0.5205209433298135, "grad_norm": 1.744941234588623, "learning_rate": 6.875510294580938e-06, "loss": 0.6803, "step": 7394 }, { "epoch": 0.5205913410770855, "grad_norm": 1.7387410402297974, "learning_rate": 6.873914444250782e-06, "loss": 0.7212, "step": 7395 }, { "epoch": 0.5206617388243576, "grad_norm": 2.2411463260650635, "learning_rate": 6.872318600475927e-06, "loss": 0.6934, "step": 7396 }, { "epoch": 0.5207321365716298, "grad_norm": 2.1251368522644043, "learning_rate": 6.870722763339345e-06, "loss": 0.7357, "step": 7397 }, { "epoch": 0.5208025343189018, "grad_norm": 2.3309500217437744, "learning_rate": 6.869126932924e-06, "loss": 0.7433, "step": 7398 }, { "epoch": 0.5208729320661739, "grad_norm": 1.9608570337295532, "learning_rate": 6.8675311093128685e-06, "loss": 0.6985, "step": 7399 }, { "epoch": 0.5209433298134459, "grad_norm": 1.7868221998214722, "learning_rate": 6.865935292588911e-06, "loss": 0.7534, "step": 7400 }, { "epoch": 0.5210137275607181, "grad_norm": 1.71648108959198, "learning_rate": 6.864339482835102e-06, "loss": 0.6437, "step": 7401 }, { "epoch": 0.5210841253079902, "grad_norm": 1.7830232381820679, "learning_rate": 6.862743680134404e-06, "loss": 0.6954, "step": 7402 }, { "epoch": 0.5211545230552622, "grad_norm": 2.0513956546783447, "learning_rate": 6.861147884569789e-06, "loss": 0.7202, "step": 7403 }, { "epoch": 0.5212249208025344, "grad_norm": 1.784148931503296, "learning_rate": 6.859552096224217e-06, "loss": 0.6815, "step": 7404 }, { "epoch": 0.5212953185498064, "grad_norm": 1.6463571786880493, "learning_rate": 6.857956315180664e-06, "loss": 0.664, "step": 7405 }, { "epoch": 0.5213657162970785, "grad_norm": 1.7348599433898926, "learning_rate": 6.856360541522086e-06, "loss": 0.621, "step": 7406 }, { "epoch": 0.5214361140443505, "grad_norm": 1.861259937286377, "learning_rate": 6.8547647753314565e-06, "loss": 0.6013, "step": 7407 }, { "epoch": 0.5215065117916227, "grad_norm": 2.289461851119995, "learning_rate": 6.853169016691736e-06, "loss": 0.7143, "step": 7408 }, { "epoch": 0.5215769095388948, "grad_norm": 1.6642680168151855, "learning_rate": 6.851573265685893e-06, "loss": 0.6935, "step": 7409 }, { "epoch": 0.5216473072861668, "grad_norm": 1.8282966613769531, "learning_rate": 6.8499775223968875e-06, "loss": 0.5588, "step": 7410 }, { "epoch": 0.521717705033439, "grad_norm": 1.9467952251434326, "learning_rate": 6.848381786907691e-06, "loss": 0.5894, "step": 7411 }, { "epoch": 0.521788102780711, "grad_norm": 1.7577133178710938, "learning_rate": 6.846786059301259e-06, "loss": 0.7109, "step": 7412 }, { "epoch": 0.5218585005279831, "grad_norm": 1.8285603523254395, "learning_rate": 6.845190339660561e-06, "loss": 0.5249, "step": 7413 }, { "epoch": 0.5219288982752552, "grad_norm": 1.8494151830673218, "learning_rate": 6.843594628068555e-06, "loss": 0.6257, "step": 7414 }, { "epoch": 0.5219992960225273, "grad_norm": 1.7404510974884033, "learning_rate": 6.841998924608209e-06, "loss": 0.5015, "step": 7415 }, { "epoch": 0.5220696937697994, "grad_norm": 1.8912338018417358, "learning_rate": 6.840403229362478e-06, "loss": 0.7292, "step": 7416 }, { "epoch": 0.5221400915170714, "grad_norm": 1.9745309352874756, "learning_rate": 6.838807542414329e-06, "loss": 0.7764, "step": 7417 }, { "epoch": 0.5222104892643435, "grad_norm": 1.675135612487793, "learning_rate": 6.837211863846724e-06, "loss": 0.7521, "step": 7418 }, { "epoch": 0.5222808870116157, "grad_norm": 1.6488927602767944, "learning_rate": 6.83561619374262e-06, "loss": 0.7203, "step": 7419 }, { "epoch": 0.5223512847588877, "grad_norm": 1.6344131231307983, "learning_rate": 6.83402053218498e-06, "loss": 0.7466, "step": 7420 }, { "epoch": 0.5224216825061598, "grad_norm": 2.302687644958496, "learning_rate": 6.83242487925676e-06, "loss": 0.7717, "step": 7421 }, { "epoch": 0.5224920802534319, "grad_norm": 1.94539475440979, "learning_rate": 6.830829235040925e-06, "loss": 0.6877, "step": 7422 }, { "epoch": 0.522562478000704, "grad_norm": 1.80856192111969, "learning_rate": 6.829233599620431e-06, "loss": 0.705, "step": 7423 }, { "epoch": 0.522632875747976, "grad_norm": 2.1718575954437256, "learning_rate": 6.827637973078237e-06, "loss": 0.7395, "step": 7424 }, { "epoch": 0.5227032734952481, "grad_norm": 1.9884939193725586, "learning_rate": 6.826042355497301e-06, "loss": 0.5717, "step": 7425 }, { "epoch": 0.5227736712425203, "grad_norm": 2.6892025470733643, "learning_rate": 6.8244467469605805e-06, "loss": 0.7638, "step": 7426 }, { "epoch": 0.5228440689897923, "grad_norm": 1.5287035703659058, "learning_rate": 6.822851147551029e-06, "loss": 0.708, "step": 7427 }, { "epoch": 0.5229144667370644, "grad_norm": 1.864464282989502, "learning_rate": 6.821255557351611e-06, "loss": 0.6917, "step": 7428 }, { "epoch": 0.5229848644843365, "grad_norm": 1.7315733432769775, "learning_rate": 6.819659976445277e-06, "loss": 0.6081, "step": 7429 }, { "epoch": 0.5230552622316086, "grad_norm": 1.6570391654968262, "learning_rate": 6.818064404914985e-06, "loss": 0.5942, "step": 7430 }, { "epoch": 0.5231256599788807, "grad_norm": 1.7434214353561401, "learning_rate": 6.816468842843689e-06, "loss": 0.6653, "step": 7431 }, { "epoch": 0.5231960577261527, "grad_norm": 1.8913851976394653, "learning_rate": 6.814873290314345e-06, "loss": 0.6386, "step": 7432 }, { "epoch": 0.5232664554734249, "grad_norm": 1.843229055404663, "learning_rate": 6.8132777474099024e-06, "loss": 0.6786, "step": 7433 }, { "epoch": 0.5233368532206969, "grad_norm": 1.5635628700256348, "learning_rate": 6.811682214213322e-06, "loss": 0.7021, "step": 7434 }, { "epoch": 0.523407250967969, "grad_norm": 2.2642741203308105, "learning_rate": 6.810086690807554e-06, "loss": 0.626, "step": 7435 }, { "epoch": 0.5234776487152412, "grad_norm": 1.854142665863037, "learning_rate": 6.808491177275551e-06, "loss": 0.6568, "step": 7436 }, { "epoch": 0.5235480464625132, "grad_norm": 1.449426531791687, "learning_rate": 6.806895673700262e-06, "loss": 0.6265, "step": 7437 }, { "epoch": 0.5236184442097853, "grad_norm": 2.031947135925293, "learning_rate": 6.805300180164646e-06, "loss": 0.7769, "step": 7438 }, { "epoch": 0.5236888419570573, "grad_norm": 2.3577120304107666, "learning_rate": 6.8037046967516476e-06, "loss": 0.7657, "step": 7439 }, { "epoch": 0.5237592397043295, "grad_norm": 1.8724920749664307, "learning_rate": 6.802109223544222e-06, "loss": 0.6964, "step": 7440 }, { "epoch": 0.5238296374516016, "grad_norm": 1.7167026996612549, "learning_rate": 6.800513760625315e-06, "loss": 0.6688, "step": 7441 }, { "epoch": 0.5239000351988736, "grad_norm": 1.7597228288650513, "learning_rate": 6.798918308077879e-06, "loss": 0.7442, "step": 7442 }, { "epoch": 0.5239704329461458, "grad_norm": 1.779679298400879, "learning_rate": 6.79732286598486e-06, "loss": 0.706, "step": 7443 }, { "epoch": 0.5240408306934178, "grad_norm": 1.7420895099639893, "learning_rate": 6.795727434429213e-06, "loss": 0.6073, "step": 7444 }, { "epoch": 0.5241112284406899, "grad_norm": 1.743107795715332, "learning_rate": 6.794132013493878e-06, "loss": 0.5963, "step": 7445 }, { "epoch": 0.5241816261879619, "grad_norm": 1.5976300239562988, "learning_rate": 6.792536603261808e-06, "loss": 0.5475, "step": 7446 }, { "epoch": 0.5242520239352341, "grad_norm": 1.9796322584152222, "learning_rate": 6.790941203815947e-06, "loss": 0.7492, "step": 7447 }, { "epoch": 0.5243224216825062, "grad_norm": 1.6430132389068604, "learning_rate": 6.789345815239244e-06, "loss": 0.7434, "step": 7448 }, { "epoch": 0.5243928194297782, "grad_norm": 1.6057250499725342, "learning_rate": 6.7877504376146394e-06, "loss": 0.6753, "step": 7449 }, { "epoch": 0.5244632171770504, "grad_norm": 1.7355108261108398, "learning_rate": 6.786155071025086e-06, "loss": 0.7909, "step": 7450 }, { "epoch": 0.5245336149243224, "grad_norm": 1.713639259338379, "learning_rate": 6.784559715553519e-06, "loss": 0.828, "step": 7451 }, { "epoch": 0.5246040126715945, "grad_norm": 1.7967215776443481, "learning_rate": 6.782964371282893e-06, "loss": 0.588, "step": 7452 }, { "epoch": 0.5246744104188666, "grad_norm": 1.6326206922531128, "learning_rate": 6.781369038296142e-06, "loss": 0.6789, "step": 7453 }, { "epoch": 0.5247448081661387, "grad_norm": 1.8411390781402588, "learning_rate": 6.779773716676215e-06, "loss": 0.6032, "step": 7454 }, { "epoch": 0.5248152059134108, "grad_norm": 2.129791021347046, "learning_rate": 6.778178406506049e-06, "loss": 0.692, "step": 7455 }, { "epoch": 0.5248856036606828, "grad_norm": 1.6395708322525024, "learning_rate": 6.776583107868593e-06, "loss": 0.6148, "step": 7456 }, { "epoch": 0.524956001407955, "grad_norm": 1.6447468996047974, "learning_rate": 6.774987820846779e-06, "loss": 0.7037, "step": 7457 }, { "epoch": 0.5250263991552271, "grad_norm": 1.7879457473754883, "learning_rate": 6.773392545523555e-06, "loss": 0.7421, "step": 7458 }, { "epoch": 0.5250967969024991, "grad_norm": 1.7525808811187744, "learning_rate": 6.771797281981856e-06, "loss": 0.5544, "step": 7459 }, { "epoch": 0.5251671946497712, "grad_norm": 1.9077346324920654, "learning_rate": 6.770202030304625e-06, "loss": 0.6087, "step": 7460 }, { "epoch": 0.5252375923970433, "grad_norm": 1.7964978218078613, "learning_rate": 6.768606790574795e-06, "loss": 0.8133, "step": 7461 }, { "epoch": 0.5253079901443154, "grad_norm": 1.7891348600387573, "learning_rate": 6.767011562875312e-06, "loss": 0.6611, "step": 7462 }, { "epoch": 0.5253783878915874, "grad_norm": 1.7438241243362427, "learning_rate": 6.765416347289105e-06, "loss": 0.6291, "step": 7463 }, { "epoch": 0.5254487856388596, "grad_norm": 1.6934930086135864, "learning_rate": 6.7638211438991175e-06, "loss": 0.6447, "step": 7464 }, { "epoch": 0.5255191833861317, "grad_norm": 1.8121527433395386, "learning_rate": 6.7622259527882806e-06, "loss": 0.6393, "step": 7465 }, { "epoch": 0.5255895811334037, "grad_norm": 1.5217853784561157, "learning_rate": 6.760630774039534e-06, "loss": 0.6454, "step": 7466 }, { "epoch": 0.5256599788806758, "grad_norm": 1.8326139450073242, "learning_rate": 6.759035607735807e-06, "loss": 0.6629, "step": 7467 }, { "epoch": 0.5257303766279479, "grad_norm": 1.6447778940200806, "learning_rate": 6.757440453960038e-06, "loss": 0.6287, "step": 7468 }, { "epoch": 0.52580077437522, "grad_norm": 1.9868388175964355, "learning_rate": 6.755845312795161e-06, "loss": 0.6542, "step": 7469 }, { "epoch": 0.5258711721224921, "grad_norm": 1.624085783958435, "learning_rate": 6.7542501843241066e-06, "loss": 0.7322, "step": 7470 }, { "epoch": 0.5259415698697641, "grad_norm": 1.6465896368026733, "learning_rate": 6.752655068629808e-06, "loss": 0.7011, "step": 7471 }, { "epoch": 0.5260119676170363, "grad_norm": 1.7026617527008057, "learning_rate": 6.7510599657951945e-06, "loss": 0.6757, "step": 7472 }, { "epoch": 0.5260823653643083, "grad_norm": 2.9634289741516113, "learning_rate": 6.749464875903201e-06, "loss": 0.7107, "step": 7473 }, { "epoch": 0.5261527631115804, "grad_norm": 2.051602363586426, "learning_rate": 6.747869799036754e-06, "loss": 0.6903, "step": 7474 }, { "epoch": 0.5262231608588526, "grad_norm": 1.6552486419677734, "learning_rate": 6.746274735278787e-06, "loss": 0.6456, "step": 7475 }, { "epoch": 0.5262935586061246, "grad_norm": 1.5990136861801147, "learning_rate": 6.744679684712224e-06, "loss": 0.6042, "step": 7476 }, { "epoch": 0.5263639563533967, "grad_norm": 1.6663833856582642, "learning_rate": 6.743084647419997e-06, "loss": 0.6298, "step": 7477 }, { "epoch": 0.5264343541006687, "grad_norm": 1.869809627532959, "learning_rate": 6.741489623485029e-06, "loss": 0.6025, "step": 7478 }, { "epoch": 0.5265047518479409, "grad_norm": 1.6614131927490234, "learning_rate": 6.739894612990253e-06, "loss": 0.6095, "step": 7479 }, { "epoch": 0.5265751495952129, "grad_norm": 1.5472526550292969, "learning_rate": 6.738299616018589e-06, "loss": 0.594, "step": 7480 }, { "epoch": 0.526645547342485, "grad_norm": 2.301776170730591, "learning_rate": 6.736704632652967e-06, "loss": 0.6852, "step": 7481 }, { "epoch": 0.5267159450897572, "grad_norm": 2.2722675800323486, "learning_rate": 6.735109662976309e-06, "loss": 0.632, "step": 7482 }, { "epoch": 0.5267863428370292, "grad_norm": 2.0143613815307617, "learning_rate": 6.73351470707154e-06, "loss": 0.7023, "step": 7483 }, { "epoch": 0.5268567405843013, "grad_norm": 1.8775767087936401, "learning_rate": 6.731919765021579e-06, "loss": 0.6213, "step": 7484 }, { "epoch": 0.5269271383315733, "grad_norm": 1.9760935306549072, "learning_rate": 6.730324836909356e-06, "loss": 0.6647, "step": 7485 }, { "epoch": 0.5269975360788455, "grad_norm": 2.172408103942871, "learning_rate": 6.728729922817789e-06, "loss": 0.6821, "step": 7486 }, { "epoch": 0.5270679338261176, "grad_norm": 1.9519052505493164, "learning_rate": 6.727135022829798e-06, "loss": 0.6156, "step": 7487 }, { "epoch": 0.5271383315733896, "grad_norm": 1.5592550039291382, "learning_rate": 6.725540137028303e-06, "loss": 0.7087, "step": 7488 }, { "epoch": 0.5272087293206618, "grad_norm": 1.8131780624389648, "learning_rate": 6.723945265496229e-06, "loss": 0.6541, "step": 7489 }, { "epoch": 0.5272791270679338, "grad_norm": 2.005467176437378, "learning_rate": 6.722350408316485e-06, "loss": 0.6509, "step": 7490 }, { "epoch": 0.5273495248152059, "grad_norm": 1.8701461553573608, "learning_rate": 6.720755565571999e-06, "loss": 0.7365, "step": 7491 }, { "epoch": 0.527419922562478, "grad_norm": 1.6121982336044312, "learning_rate": 6.719160737345682e-06, "loss": 0.6683, "step": 7492 }, { "epoch": 0.5274903203097501, "grad_norm": 1.6670591831207275, "learning_rate": 6.717565923720455e-06, "loss": 0.6859, "step": 7493 }, { "epoch": 0.5275607180570222, "grad_norm": 1.845933198928833, "learning_rate": 6.7159711247792276e-06, "loss": 0.7321, "step": 7494 }, { "epoch": 0.5276311158042942, "grad_norm": 1.6344823837280273, "learning_rate": 6.714376340604923e-06, "loss": 0.769, "step": 7495 }, { "epoch": 0.5277015135515664, "grad_norm": 1.7420029640197754, "learning_rate": 6.712781571280446e-06, "loss": 0.5877, "step": 7496 }, { "epoch": 0.5277719112988385, "grad_norm": 1.7725459337234497, "learning_rate": 6.7111868168887195e-06, "loss": 0.7439, "step": 7497 }, { "epoch": 0.5278423090461105, "grad_norm": 1.8906582593917847, "learning_rate": 6.70959207751265e-06, "loss": 0.6795, "step": 7498 }, { "epoch": 0.5279127067933826, "grad_norm": 1.8604646921157837, "learning_rate": 6.707997353235153e-06, "loss": 0.6254, "step": 7499 }, { "epoch": 0.5279831045406547, "grad_norm": 1.7486445903778076, "learning_rate": 6.706402644139136e-06, "loss": 0.6175, "step": 7500 }, { "epoch": 0.5280535022879268, "grad_norm": 2.08443284034729, "learning_rate": 6.7048079503075145e-06, "loss": 0.753, "step": 7501 }, { "epoch": 0.5281239000351988, "grad_norm": 1.5325770378112793, "learning_rate": 6.703213271823191e-06, "loss": 0.73, "step": 7502 }, { "epoch": 0.528194297782471, "grad_norm": 1.7194849252700806, "learning_rate": 6.701618608769081e-06, "loss": 0.749, "step": 7503 }, { "epoch": 0.5282646955297431, "grad_norm": 1.7757371664047241, "learning_rate": 6.700023961228088e-06, "loss": 0.6504, "step": 7504 }, { "epoch": 0.5283350932770151, "grad_norm": 2.1713650226593018, "learning_rate": 6.698429329283123e-06, "loss": 0.8018, "step": 7505 }, { "epoch": 0.5284054910242872, "grad_norm": 2.1625423431396484, "learning_rate": 6.696834713017087e-06, "loss": 0.7458, "step": 7506 }, { "epoch": 0.5284758887715593, "grad_norm": 1.8513375520706177, "learning_rate": 6.695240112512894e-06, "loss": 0.7984, "step": 7507 }, { "epoch": 0.5285462865188314, "grad_norm": 1.632020354270935, "learning_rate": 6.6936455278534385e-06, "loss": 0.6246, "step": 7508 }, { "epoch": 0.5286166842661035, "grad_norm": 1.7794729471206665, "learning_rate": 6.692050959121633e-06, "loss": 0.6721, "step": 7509 }, { "epoch": 0.5286870820133756, "grad_norm": 1.7825604677200317, "learning_rate": 6.690456406400375e-06, "loss": 0.7461, "step": 7510 }, { "epoch": 0.5287574797606477, "grad_norm": 1.8926806449890137, "learning_rate": 6.68886186977257e-06, "loss": 0.7776, "step": 7511 }, { "epoch": 0.5288278775079197, "grad_norm": 1.8956408500671387, "learning_rate": 6.687267349321116e-06, "loss": 0.6919, "step": 7512 }, { "epoch": 0.5288982752551918, "grad_norm": 2.341432809829712, "learning_rate": 6.685672845128919e-06, "loss": 0.7111, "step": 7513 }, { "epoch": 0.528968673002464, "grad_norm": 1.8126460313796997, "learning_rate": 6.684078357278872e-06, "loss": 0.7536, "step": 7514 }, { "epoch": 0.529039070749736, "grad_norm": 2.3326852321624756, "learning_rate": 6.682483885853881e-06, "loss": 0.8129, "step": 7515 }, { "epoch": 0.5291094684970081, "grad_norm": 2.5285210609436035, "learning_rate": 6.680889430936838e-06, "loss": 0.7359, "step": 7516 }, { "epoch": 0.5291798662442802, "grad_norm": 1.6461563110351562, "learning_rate": 6.679294992610643e-06, "loss": 0.6089, "step": 7517 }, { "epoch": 0.5292502639915523, "grad_norm": 2.044753074645996, "learning_rate": 6.67770057095819e-06, "loss": 0.6962, "step": 7518 }, { "epoch": 0.5293206617388243, "grad_norm": 1.7113864421844482, "learning_rate": 6.676106166062377e-06, "loss": 0.6615, "step": 7519 }, { "epoch": 0.5293910594860964, "grad_norm": 1.8552175760269165, "learning_rate": 6.6745117780061e-06, "loss": 0.7467, "step": 7520 }, { "epoch": 0.5294614572333686, "grad_norm": 1.8117557764053345, "learning_rate": 6.672917406872248e-06, "loss": 0.8, "step": 7521 }, { "epoch": 0.5295318549806406, "grad_norm": 1.7208598852157593, "learning_rate": 6.671323052743719e-06, "loss": 0.6985, "step": 7522 }, { "epoch": 0.5296022527279127, "grad_norm": 1.65394926071167, "learning_rate": 6.669728715703398e-06, "loss": 0.6833, "step": 7523 }, { "epoch": 0.5296726504751847, "grad_norm": 1.974219799041748, "learning_rate": 6.6681343958341836e-06, "loss": 0.6186, "step": 7524 }, { "epoch": 0.5297430482224569, "grad_norm": 2.463388204574585, "learning_rate": 6.666540093218961e-06, "loss": 0.7838, "step": 7525 }, { "epoch": 0.529813445969729, "grad_norm": 1.912752389907837, "learning_rate": 6.664945807940623e-06, "loss": 0.6655, "step": 7526 }, { "epoch": 0.529883843717001, "grad_norm": 1.8175055980682373, "learning_rate": 6.663351540082053e-06, "loss": 0.668, "step": 7527 }, { "epoch": 0.5299542414642732, "grad_norm": 1.6138761043548584, "learning_rate": 6.661757289726144e-06, "loss": 0.7255, "step": 7528 }, { "epoch": 0.5300246392115452, "grad_norm": 1.6489248275756836, "learning_rate": 6.660163056955777e-06, "loss": 0.6884, "step": 7529 }, { "epoch": 0.5300950369588173, "grad_norm": 2.275033950805664, "learning_rate": 6.658568841853843e-06, "loss": 0.6585, "step": 7530 }, { "epoch": 0.5301654347060895, "grad_norm": 1.7176398038864136, "learning_rate": 6.656974644503222e-06, "loss": 0.6444, "step": 7531 }, { "epoch": 0.5302358324533615, "grad_norm": 1.9630380868911743, "learning_rate": 6.655380464986803e-06, "loss": 0.6126, "step": 7532 }, { "epoch": 0.5303062302006336, "grad_norm": 1.621010184288025, "learning_rate": 6.653786303387463e-06, "loss": 0.689, "step": 7533 }, { "epoch": 0.5303766279479056, "grad_norm": 1.748073697090149, "learning_rate": 6.652192159788088e-06, "loss": 0.6095, "step": 7534 }, { "epoch": 0.5304470256951778, "grad_norm": 1.9960527420043945, "learning_rate": 6.650598034271555e-06, "loss": 0.6615, "step": 7535 }, { "epoch": 0.5305174234424498, "grad_norm": 1.8324952125549316, "learning_rate": 6.6490039269207505e-06, "loss": 0.7185, "step": 7536 }, { "epoch": 0.5305878211897219, "grad_norm": 1.7429721355438232, "learning_rate": 6.6474098378185465e-06, "loss": 0.7199, "step": 7537 }, { "epoch": 0.530658218936994, "grad_norm": 1.896440029144287, "learning_rate": 6.645815767047827e-06, "loss": 0.5962, "step": 7538 }, { "epoch": 0.5307286166842661, "grad_norm": 1.9084726572036743, "learning_rate": 6.644221714691465e-06, "loss": 0.7683, "step": 7539 }, { "epoch": 0.5307990144315382, "grad_norm": 1.7117743492126465, "learning_rate": 6.64262768083234e-06, "loss": 0.7243, "step": 7540 }, { "epoch": 0.5308694121788102, "grad_norm": 1.6634089946746826, "learning_rate": 6.641033665553321e-06, "loss": 0.6265, "step": 7541 }, { "epoch": 0.5309398099260824, "grad_norm": 1.8038777112960815, "learning_rate": 6.639439668937292e-06, "loss": 0.6173, "step": 7542 }, { "epoch": 0.5310102076733545, "grad_norm": 1.5525712966918945, "learning_rate": 6.637845691067117e-06, "loss": 0.6789, "step": 7543 }, { "epoch": 0.5310806054206265, "grad_norm": 1.5650488138198853, "learning_rate": 6.636251732025676e-06, "loss": 0.6603, "step": 7544 }, { "epoch": 0.5311510031678986, "grad_norm": 1.987973928451538, "learning_rate": 6.634657791895833e-06, "loss": 0.7499, "step": 7545 }, { "epoch": 0.5312214009151707, "grad_norm": 1.885811686515808, "learning_rate": 6.6330638707604675e-06, "loss": 0.6866, "step": 7546 }, { "epoch": 0.5312917986624428, "grad_norm": 1.702982783317566, "learning_rate": 6.63146996870244e-06, "loss": 0.7379, "step": 7547 }, { "epoch": 0.5313621964097149, "grad_norm": 1.787111520767212, "learning_rate": 6.629876085804623e-06, "loss": 0.7197, "step": 7548 }, { "epoch": 0.531432594156987, "grad_norm": 1.7335540056228638, "learning_rate": 6.628282222149884e-06, "loss": 0.7013, "step": 7549 }, { "epoch": 0.5315029919042591, "grad_norm": 1.5618013143539429, "learning_rate": 6.62668837782109e-06, "loss": 0.6209, "step": 7550 }, { "epoch": 0.5315733896515311, "grad_norm": 1.9631468057632446, "learning_rate": 6.6250945529011025e-06, "loss": 0.6712, "step": 7551 }, { "epoch": 0.5316437873988032, "grad_norm": 2.008808135986328, "learning_rate": 6.623500747472793e-06, "loss": 0.6899, "step": 7552 }, { "epoch": 0.5317141851460754, "grad_norm": 1.6450614929199219, "learning_rate": 6.621906961619015e-06, "loss": 0.7631, "step": 7553 }, { "epoch": 0.5317845828933474, "grad_norm": 2.401175022125244, "learning_rate": 6.620313195422641e-06, "loss": 0.6669, "step": 7554 }, { "epoch": 0.5318549806406195, "grad_norm": 1.9532666206359863, "learning_rate": 6.618719448966526e-06, "loss": 0.7256, "step": 7555 }, { "epoch": 0.5319253783878916, "grad_norm": 1.6480385065078735, "learning_rate": 6.617125722333534e-06, "loss": 0.7787, "step": 7556 }, { "epoch": 0.5319957761351637, "grad_norm": 2.0433669090270996, "learning_rate": 6.615532015606518e-06, "loss": 0.6641, "step": 7557 }, { "epoch": 0.5320661738824357, "grad_norm": 1.7731455564498901, "learning_rate": 6.6139383288683464e-06, "loss": 0.6834, "step": 7558 }, { "epoch": 0.5321365716297078, "grad_norm": 1.781884789466858, "learning_rate": 6.6123446622018655e-06, "loss": 0.6478, "step": 7559 }, { "epoch": 0.53220696937698, "grad_norm": 1.7247004508972168, "learning_rate": 6.610751015689941e-06, "loss": 0.6018, "step": 7560 }, { "epoch": 0.532277367124252, "grad_norm": 1.8551161289215088, "learning_rate": 6.609157389415421e-06, "loss": 0.7282, "step": 7561 }, { "epoch": 0.5323477648715241, "grad_norm": 1.9824702739715576, "learning_rate": 6.607563783461164e-06, "loss": 0.6314, "step": 7562 }, { "epoch": 0.5324181626187962, "grad_norm": 1.6524405479431152, "learning_rate": 6.605970197910019e-06, "loss": 0.6948, "step": 7563 }, { "epoch": 0.5324885603660683, "grad_norm": 1.8479406833648682, "learning_rate": 6.604376632844845e-06, "loss": 0.8421, "step": 7564 }, { "epoch": 0.5325589581133404, "grad_norm": 1.6810022592544556, "learning_rate": 6.602783088348482e-06, "loss": 0.7294, "step": 7565 }, { "epoch": 0.5326293558606124, "grad_norm": 1.7090864181518555, "learning_rate": 6.60118956450379e-06, "loss": 0.6951, "step": 7566 }, { "epoch": 0.5326997536078846, "grad_norm": 1.7481186389923096, "learning_rate": 6.599596061393612e-06, "loss": 0.6516, "step": 7567 }, { "epoch": 0.5327701513551566, "grad_norm": 2.0140206813812256, "learning_rate": 6.5980025791007956e-06, "loss": 0.7058, "step": 7568 }, { "epoch": 0.5328405491024287, "grad_norm": 1.8149967193603516, "learning_rate": 6.596409117708193e-06, "loss": 0.6632, "step": 7569 }, { "epoch": 0.5329109468497009, "grad_norm": 1.7204270362854004, "learning_rate": 6.594815677298644e-06, "loss": 0.7462, "step": 7570 }, { "epoch": 0.5329813445969729, "grad_norm": 2.195477247238159, "learning_rate": 6.593222257954997e-06, "loss": 0.7379, "step": 7571 }, { "epoch": 0.533051742344245, "grad_norm": 1.9683197736740112, "learning_rate": 6.59162885976009e-06, "loss": 0.5701, "step": 7572 }, { "epoch": 0.533122140091517, "grad_norm": 2.1704883575439453, "learning_rate": 6.590035482796772e-06, "loss": 0.7362, "step": 7573 }, { "epoch": 0.5331925378387892, "grad_norm": 2.072948694229126, "learning_rate": 6.588442127147876e-06, "loss": 0.6935, "step": 7574 }, { "epoch": 0.5332629355860612, "grad_norm": 1.7833729982376099, "learning_rate": 6.586848792896251e-06, "loss": 0.654, "step": 7575 }, { "epoch": 0.5333333333333333, "grad_norm": 1.9999759197235107, "learning_rate": 6.585255480124728e-06, "loss": 0.732, "step": 7576 }, { "epoch": 0.5334037310806055, "grad_norm": 1.4602270126342773, "learning_rate": 6.5836621889161515e-06, "loss": 0.5872, "step": 7577 }, { "epoch": 0.5334741288278775, "grad_norm": 2.1560707092285156, "learning_rate": 6.582068919353352e-06, "loss": 0.6005, "step": 7578 }, { "epoch": 0.5335445265751496, "grad_norm": 1.6263302564620972, "learning_rate": 6.58047567151917e-06, "loss": 0.6699, "step": 7579 }, { "epoch": 0.5336149243224216, "grad_norm": 1.7706831693649292, "learning_rate": 6.5788824454964346e-06, "loss": 0.6237, "step": 7580 }, { "epoch": 0.5336853220696938, "grad_norm": 1.645206093788147, "learning_rate": 6.577289241367986e-06, "loss": 0.6065, "step": 7581 }, { "epoch": 0.5337557198169659, "grad_norm": 1.8330366611480713, "learning_rate": 6.5756960592166505e-06, "loss": 0.6084, "step": 7582 }, { "epoch": 0.5338261175642379, "grad_norm": 1.4619405269622803, "learning_rate": 6.5741028991252625e-06, "loss": 0.6582, "step": 7583 }, { "epoch": 0.5338965153115101, "grad_norm": 1.7559928894042969, "learning_rate": 6.572509761176649e-06, "loss": 0.7826, "step": 7584 }, { "epoch": 0.5339669130587821, "grad_norm": 1.8723212480545044, "learning_rate": 6.570916645453641e-06, "loss": 0.6883, "step": 7585 }, { "epoch": 0.5340373108060542, "grad_norm": 1.9045860767364502, "learning_rate": 6.569323552039063e-06, "loss": 0.774, "step": 7586 }, { "epoch": 0.5341077085533263, "grad_norm": 1.8993767499923706, "learning_rate": 6.567730481015747e-06, "loss": 0.7194, "step": 7587 }, { "epoch": 0.5341781063005984, "grad_norm": 1.8078961372375488, "learning_rate": 6.566137432466513e-06, "loss": 0.5774, "step": 7588 }, { "epoch": 0.5342485040478705, "grad_norm": 1.9497579336166382, "learning_rate": 6.5645444064741885e-06, "loss": 0.8288, "step": 7589 }, { "epoch": 0.5343189017951425, "grad_norm": 2.015528678894043, "learning_rate": 6.562951403121592e-06, "loss": 0.7018, "step": 7590 }, { "epoch": 0.5343892995424147, "grad_norm": 1.7684500217437744, "learning_rate": 6.561358422491551e-06, "loss": 0.6903, "step": 7591 }, { "epoch": 0.5344596972896867, "grad_norm": 1.9624093770980835, "learning_rate": 6.5597654646668796e-06, "loss": 0.6145, "step": 7592 }, { "epoch": 0.5345300950369588, "grad_norm": 1.8126200437545776, "learning_rate": 6.558172529730402e-06, "loss": 0.8254, "step": 7593 }, { "epoch": 0.5346004927842309, "grad_norm": 1.8028085231781006, "learning_rate": 6.5565796177649335e-06, "loss": 0.6722, "step": 7594 }, { "epoch": 0.534670890531503, "grad_norm": 1.5922553539276123, "learning_rate": 6.554986728853295e-06, "loss": 0.5974, "step": 7595 }, { "epoch": 0.5347412882787751, "grad_norm": 1.889975905418396, "learning_rate": 6.553393863078296e-06, "loss": 0.6103, "step": 7596 }, { "epoch": 0.5348116860260471, "grad_norm": 2.4559378623962402, "learning_rate": 6.551801020522759e-06, "loss": 0.7008, "step": 7597 }, { "epoch": 0.5348820837733193, "grad_norm": 1.8064526319503784, "learning_rate": 6.550208201269488e-06, "loss": 0.6323, "step": 7598 }, { "epoch": 0.5349524815205914, "grad_norm": 1.7212413549423218, "learning_rate": 6.548615405401303e-06, "loss": 0.6201, "step": 7599 }, { "epoch": 0.5350228792678634, "grad_norm": 1.7334377765655518, "learning_rate": 6.547022633001011e-06, "loss": 0.6379, "step": 7600 }, { "epoch": 0.5350932770151355, "grad_norm": 1.81745445728302, "learning_rate": 6.545429884151424e-06, "loss": 0.5975, "step": 7601 }, { "epoch": 0.5351636747624076, "grad_norm": 1.9506843090057373, "learning_rate": 6.543837158935345e-06, "loss": 0.7484, "step": 7602 }, { "epoch": 0.5352340725096797, "grad_norm": 1.9207278490066528, "learning_rate": 6.5422444574355905e-06, "loss": 0.7411, "step": 7603 }, { "epoch": 0.5353044702569518, "grad_norm": 1.642674446105957, "learning_rate": 6.540651779734956e-06, "loss": 0.6263, "step": 7604 }, { "epoch": 0.5353748680042238, "grad_norm": 1.889747977256775, "learning_rate": 6.5390591259162555e-06, "loss": 0.5822, "step": 7605 }, { "epoch": 0.535445265751496, "grad_norm": 1.8003062009811401, "learning_rate": 6.537466496062287e-06, "loss": 0.7749, "step": 7606 }, { "epoch": 0.535515663498768, "grad_norm": 1.9270669221878052, "learning_rate": 6.535873890255854e-06, "loss": 0.7477, "step": 7607 }, { "epoch": 0.5355860612460401, "grad_norm": 1.6326743364334106, "learning_rate": 6.534281308579757e-06, "loss": 0.6714, "step": 7608 }, { "epoch": 0.5356564589933123, "grad_norm": 1.8754991292953491, "learning_rate": 6.5326887511168e-06, "loss": 0.7455, "step": 7609 }, { "epoch": 0.5357268567405843, "grad_norm": 1.8203610181808472, "learning_rate": 6.531096217949774e-06, "loss": 0.6497, "step": 7610 }, { "epoch": 0.5357972544878564, "grad_norm": 1.651504397392273, "learning_rate": 6.529503709161483e-06, "loss": 0.7736, "step": 7611 }, { "epoch": 0.5358676522351284, "grad_norm": 1.7416820526123047, "learning_rate": 6.527911224834719e-06, "loss": 0.7641, "step": 7612 }, { "epoch": 0.5359380499824006, "grad_norm": 1.760651707649231, "learning_rate": 6.526318765052279e-06, "loss": 0.6639, "step": 7613 }, { "epoch": 0.5360084477296726, "grad_norm": 1.7100434303283691, "learning_rate": 6.5247263298969525e-06, "loss": 0.6942, "step": 7614 }, { "epoch": 0.5360788454769447, "grad_norm": 2.0602376461029053, "learning_rate": 6.523133919451539e-06, "loss": 0.7618, "step": 7615 }, { "epoch": 0.5361492432242169, "grad_norm": 1.5820198059082031, "learning_rate": 6.521541533798821e-06, "loss": 0.7584, "step": 7616 }, { "epoch": 0.5362196409714889, "grad_norm": 1.6755225658416748, "learning_rate": 6.519949173021594e-06, "loss": 0.6295, "step": 7617 }, { "epoch": 0.536290038718761, "grad_norm": 1.7883131504058838, "learning_rate": 6.5183568372026435e-06, "loss": 0.7669, "step": 7618 }, { "epoch": 0.536360436466033, "grad_norm": 1.7524387836456299, "learning_rate": 6.516764526424756e-06, "loss": 0.753, "step": 7619 }, { "epoch": 0.5364308342133052, "grad_norm": 1.6729588508605957, "learning_rate": 6.5151722407707195e-06, "loss": 0.7181, "step": 7620 }, { "epoch": 0.5365012319605773, "grad_norm": 2.098452091217041, "learning_rate": 6.513579980323319e-06, "loss": 0.6554, "step": 7621 }, { "epoch": 0.5365716297078493, "grad_norm": 1.6959097385406494, "learning_rate": 6.5119877451653345e-06, "loss": 0.6121, "step": 7622 }, { "epoch": 0.5366420274551215, "grad_norm": 1.6789363622665405, "learning_rate": 6.5103955353795495e-06, "loss": 0.6554, "step": 7623 }, { "epoch": 0.5367124252023935, "grad_norm": 1.6949821710586548, "learning_rate": 6.508803351048744e-06, "loss": 0.6116, "step": 7624 }, { "epoch": 0.5367828229496656, "grad_norm": 1.7603811025619507, "learning_rate": 6.507211192255695e-06, "loss": 0.7552, "step": 7625 }, { "epoch": 0.5368532206969377, "grad_norm": 1.5074735879898071, "learning_rate": 6.505619059083185e-06, "loss": 0.7647, "step": 7626 }, { "epoch": 0.5369236184442098, "grad_norm": 1.7914700508117676, "learning_rate": 6.504026951613987e-06, "loss": 0.7322, "step": 7627 }, { "epoch": 0.5369940161914819, "grad_norm": 1.8822458982467651, "learning_rate": 6.502434869930878e-06, "loss": 0.7405, "step": 7628 }, { "epoch": 0.5370644139387539, "grad_norm": 1.9195444583892822, "learning_rate": 6.500842814116629e-06, "loss": 0.7344, "step": 7629 }, { "epoch": 0.5371348116860261, "grad_norm": 1.7205439805984497, "learning_rate": 6.499250784254017e-06, "loss": 0.7425, "step": 7630 }, { "epoch": 0.5372052094332981, "grad_norm": 1.5717813968658447, "learning_rate": 6.497658780425806e-06, "loss": 0.6014, "step": 7631 }, { "epoch": 0.5372756071805702, "grad_norm": 1.8617757558822632, "learning_rate": 6.496066802714773e-06, "loss": 0.6897, "step": 7632 }, { "epoch": 0.5373460049278423, "grad_norm": 1.9209654331207275, "learning_rate": 6.4944748512036815e-06, "loss": 0.631, "step": 7633 }, { "epoch": 0.5374164026751144, "grad_norm": 1.6570935249328613, "learning_rate": 6.492882925975302e-06, "loss": 0.6882, "step": 7634 }, { "epoch": 0.5374868004223865, "grad_norm": 1.8075212240219116, "learning_rate": 6.491291027112398e-06, "loss": 0.8008, "step": 7635 }, { "epoch": 0.5375571981696585, "grad_norm": 1.7746261358261108, "learning_rate": 6.489699154697735e-06, "loss": 0.6159, "step": 7636 }, { "epoch": 0.5376275959169307, "grad_norm": 1.8936866521835327, "learning_rate": 6.488107308814072e-06, "loss": 0.6072, "step": 7637 }, { "epoch": 0.5376979936642028, "grad_norm": 1.9252159595489502, "learning_rate": 6.486515489544176e-06, "loss": 0.7596, "step": 7638 }, { "epoch": 0.5377683914114748, "grad_norm": 1.836943507194519, "learning_rate": 6.484923696970804e-06, "loss": 0.6519, "step": 7639 }, { "epoch": 0.5378387891587469, "grad_norm": 1.577463984489441, "learning_rate": 6.483331931176717e-06, "loss": 0.5807, "step": 7640 }, { "epoch": 0.537909186906019, "grad_norm": 2.2375128269195557, "learning_rate": 6.481740192244669e-06, "loss": 0.6841, "step": 7641 }, { "epoch": 0.5379795846532911, "grad_norm": 1.8305737972259521, "learning_rate": 6.48014848025742e-06, "loss": 0.6673, "step": 7642 }, { "epoch": 0.5380499824005632, "grad_norm": 1.5856820344924927, "learning_rate": 6.478556795297719e-06, "loss": 0.7657, "step": 7643 }, { "epoch": 0.5381203801478353, "grad_norm": 1.5195484161376953, "learning_rate": 6.4769651374483244e-06, "loss": 0.6363, "step": 7644 }, { "epoch": 0.5381907778951074, "grad_norm": 2.2556374073028564, "learning_rate": 6.475373506791985e-06, "loss": 0.6214, "step": 7645 }, { "epoch": 0.5382611756423794, "grad_norm": 1.662761926651001, "learning_rate": 6.473781903411455e-06, "loss": 0.549, "step": 7646 }, { "epoch": 0.5383315733896515, "grad_norm": 1.4402122497558594, "learning_rate": 6.472190327389477e-06, "loss": 0.6282, "step": 7647 }, { "epoch": 0.5384019711369237, "grad_norm": 1.8957995176315308, "learning_rate": 6.470598778808805e-06, "loss": 0.6763, "step": 7648 }, { "epoch": 0.5384723688841957, "grad_norm": 1.7002350091934204, "learning_rate": 6.469007257752178e-06, "loss": 0.7286, "step": 7649 }, { "epoch": 0.5385427666314678, "grad_norm": 1.6537771224975586, "learning_rate": 6.467415764302348e-06, "loss": 0.6924, "step": 7650 }, { "epoch": 0.5386131643787399, "grad_norm": 1.6342873573303223, "learning_rate": 6.465824298542053e-06, "loss": 0.758, "step": 7651 }, { "epoch": 0.538683562126012, "grad_norm": 1.4749640226364136, "learning_rate": 6.4642328605540375e-06, "loss": 0.6528, "step": 7652 }, { "epoch": 0.538753959873284, "grad_norm": 1.7774696350097656, "learning_rate": 6.462641450421038e-06, "loss": 0.6277, "step": 7653 }, { "epoch": 0.5388243576205561, "grad_norm": 1.6630568504333496, "learning_rate": 6.461050068225802e-06, "loss": 0.5561, "step": 7654 }, { "epoch": 0.5388947553678283, "grad_norm": 1.707047700881958, "learning_rate": 6.459458714051055e-06, "loss": 0.5947, "step": 7655 }, { "epoch": 0.5389651531151003, "grad_norm": 1.6108838319778442, "learning_rate": 6.457867387979543e-06, "loss": 0.7107, "step": 7656 }, { "epoch": 0.5390355508623724, "grad_norm": 1.7779872417449951, "learning_rate": 6.456276090093994e-06, "loss": 0.7612, "step": 7657 }, { "epoch": 0.5391059486096444, "grad_norm": 1.5084747076034546, "learning_rate": 6.454684820477145e-06, "loss": 0.6586, "step": 7658 }, { "epoch": 0.5391763463569166, "grad_norm": 1.7420681715011597, "learning_rate": 6.453093579211724e-06, "loss": 0.6269, "step": 7659 }, { "epoch": 0.5392467441041887, "grad_norm": 1.9786864519119263, "learning_rate": 6.451502366380467e-06, "loss": 0.7259, "step": 7660 }, { "epoch": 0.5393171418514607, "grad_norm": 1.6913539171218872, "learning_rate": 6.449911182066095e-06, "loss": 0.6144, "step": 7661 }, { "epoch": 0.5393875395987329, "grad_norm": 1.9882802963256836, "learning_rate": 6.448320026351341e-06, "loss": 0.6721, "step": 7662 }, { "epoch": 0.5394579373460049, "grad_norm": 1.7868109941482544, "learning_rate": 6.446728899318928e-06, "loss": 0.6476, "step": 7663 }, { "epoch": 0.539528335093277, "grad_norm": 1.9142075777053833, "learning_rate": 6.445137801051581e-06, "loss": 0.7497, "step": 7664 }, { "epoch": 0.5395987328405492, "grad_norm": 1.827823281288147, "learning_rate": 6.443546731632019e-06, "loss": 0.6297, "step": 7665 }, { "epoch": 0.5396691305878212, "grad_norm": 1.705425500869751, "learning_rate": 6.441955691142971e-06, "loss": 0.6178, "step": 7666 }, { "epoch": 0.5397395283350933, "grad_norm": 1.8430206775665283, "learning_rate": 6.440364679667148e-06, "loss": 0.7068, "step": 7667 }, { "epoch": 0.5398099260823653, "grad_norm": 2.0737600326538086, "learning_rate": 6.438773697287274e-06, "loss": 0.6741, "step": 7668 }, { "epoch": 0.5398803238296375, "grad_norm": 1.5770585536956787, "learning_rate": 6.4371827440860645e-06, "loss": 0.6761, "step": 7669 }, { "epoch": 0.5399507215769095, "grad_norm": 2.215792655944824, "learning_rate": 6.435591820146231e-06, "loss": 0.7524, "step": 7670 }, { "epoch": 0.5400211193241816, "grad_norm": 1.6495838165283203, "learning_rate": 6.434000925550494e-06, "loss": 0.7211, "step": 7671 }, { "epoch": 0.5400915170714538, "grad_norm": 1.7708029747009277, "learning_rate": 6.432410060381559e-06, "loss": 0.7513, "step": 7672 }, { "epoch": 0.5401619148187258, "grad_norm": 1.5538126230239868, "learning_rate": 6.43081922472214e-06, "loss": 0.6454, "step": 7673 }, { "epoch": 0.5402323125659979, "grad_norm": 1.7136077880859375, "learning_rate": 6.429228418654943e-06, "loss": 0.6372, "step": 7674 }, { "epoch": 0.5403027103132699, "grad_norm": 1.8492094278335571, "learning_rate": 6.427637642262681e-06, "loss": 0.8329, "step": 7675 }, { "epoch": 0.5403731080605421, "grad_norm": 1.995292067527771, "learning_rate": 6.426046895628052e-06, "loss": 0.7199, "step": 7676 }, { "epoch": 0.5404435058078142, "grad_norm": 2.002349615097046, "learning_rate": 6.4244561788337685e-06, "loss": 0.6631, "step": 7677 }, { "epoch": 0.5405139035550862, "grad_norm": 2.9734368324279785, "learning_rate": 6.422865491962527e-06, "loss": 0.671, "step": 7678 }, { "epoch": 0.5405843013023583, "grad_norm": 1.7497472763061523, "learning_rate": 6.421274835097034e-06, "loss": 0.652, "step": 7679 }, { "epoch": 0.5406546990496304, "grad_norm": 1.7477983236312866, "learning_rate": 6.419684208319984e-06, "loss": 0.6937, "step": 7680 }, { "epoch": 0.5407250967969025, "grad_norm": 1.5725513696670532, "learning_rate": 6.4180936117140795e-06, "loss": 0.6688, "step": 7681 }, { "epoch": 0.5407954945441746, "grad_norm": 1.7177162170410156, "learning_rate": 6.416503045362011e-06, "loss": 0.7293, "step": 7682 }, { "epoch": 0.5408658922914467, "grad_norm": 1.699470043182373, "learning_rate": 6.414912509346483e-06, "loss": 0.6472, "step": 7683 }, { "epoch": 0.5409362900387188, "grad_norm": 2.2943477630615234, "learning_rate": 6.41332200375018e-06, "loss": 0.6213, "step": 7684 }, { "epoch": 0.5410066877859908, "grad_norm": 1.5972942113876343, "learning_rate": 6.411731528655799e-06, "loss": 0.5708, "step": 7685 }, { "epoch": 0.541077085533263, "grad_norm": 2.069218873977661, "learning_rate": 6.410141084146028e-06, "loss": 0.617, "step": 7686 }, { "epoch": 0.541147483280535, "grad_norm": 1.800736904144287, "learning_rate": 6.408550670303557e-06, "loss": 0.6477, "step": 7687 }, { "epoch": 0.5412178810278071, "grad_norm": 1.792518138885498, "learning_rate": 6.40696028721107e-06, "loss": 0.7397, "step": 7688 }, { "epoch": 0.5412882787750792, "grad_norm": 1.5638318061828613, "learning_rate": 6.405369934951257e-06, "loss": 0.6252, "step": 7689 }, { "epoch": 0.5413586765223513, "grad_norm": 1.8888559341430664, "learning_rate": 6.403779613606799e-06, "loss": 0.6607, "step": 7690 }, { "epoch": 0.5414290742696234, "grad_norm": 1.807568073272705, "learning_rate": 6.402189323260379e-06, "loss": 0.6187, "step": 7691 }, { "epoch": 0.5414994720168954, "grad_norm": 2.291107416152954, "learning_rate": 6.400599063994677e-06, "loss": 0.815, "step": 7692 }, { "epoch": 0.5415698697641675, "grad_norm": 1.7056608200073242, "learning_rate": 6.399008835892374e-06, "loss": 0.7074, "step": 7693 }, { "epoch": 0.5416402675114397, "grad_norm": 1.6791787147521973, "learning_rate": 6.397418639036143e-06, "loss": 0.7018, "step": 7694 }, { "epoch": 0.5417106652587117, "grad_norm": 1.6861300468444824, "learning_rate": 6.395828473508665e-06, "loss": 0.7287, "step": 7695 }, { "epoch": 0.5417810630059838, "grad_norm": 2.121337413787842, "learning_rate": 6.394238339392611e-06, "loss": 0.6066, "step": 7696 }, { "epoch": 0.5418514607532559, "grad_norm": 1.8389884233474731, "learning_rate": 6.392648236770655e-06, "loss": 0.5979, "step": 7697 }, { "epoch": 0.541921858500528, "grad_norm": 2.3450424671173096, "learning_rate": 6.391058165725467e-06, "loss": 0.7965, "step": 7698 }, { "epoch": 0.5419922562478001, "grad_norm": 1.827224612236023, "learning_rate": 6.389468126339717e-06, "loss": 0.7261, "step": 7699 }, { "epoch": 0.5420626539950721, "grad_norm": 2.1186211109161377, "learning_rate": 6.3878781186960695e-06, "loss": 0.6262, "step": 7700 }, { "epoch": 0.5421330517423443, "grad_norm": 1.4815746545791626, "learning_rate": 6.3862881428771955e-06, "loss": 0.7919, "step": 7701 }, { "epoch": 0.5422034494896163, "grad_norm": 1.8806374073028564, "learning_rate": 6.384698198965755e-06, "loss": 0.6331, "step": 7702 }, { "epoch": 0.5422738472368884, "grad_norm": 1.735249638557434, "learning_rate": 6.383108287044415e-06, "loss": 0.7233, "step": 7703 }, { "epoch": 0.5423442449841606, "grad_norm": 2.352452516555786, "learning_rate": 6.381518407195831e-06, "loss": 0.6534, "step": 7704 }, { "epoch": 0.5424146427314326, "grad_norm": 1.8975284099578857, "learning_rate": 6.379928559502668e-06, "loss": 0.6257, "step": 7705 }, { "epoch": 0.5424850404787047, "grad_norm": 1.6682980060577393, "learning_rate": 6.378338744047577e-06, "loss": 0.6982, "step": 7706 }, { "epoch": 0.5425554382259767, "grad_norm": 1.9517183303833008, "learning_rate": 6.376748960913222e-06, "loss": 0.6555, "step": 7707 }, { "epoch": 0.5426258359732489, "grad_norm": 2.0254437923431396, "learning_rate": 6.37515921018225e-06, "loss": 0.6809, "step": 7708 }, { "epoch": 0.5426962337205209, "grad_norm": 1.860432744026184, "learning_rate": 6.373569491937318e-06, "loss": 0.6938, "step": 7709 }, { "epoch": 0.542766631467793, "grad_norm": 2.0693907737731934, "learning_rate": 6.371979806261073e-06, "loss": 0.7371, "step": 7710 }, { "epoch": 0.5428370292150652, "grad_norm": 1.7892760038375854, "learning_rate": 6.370390153236171e-06, "loss": 0.6424, "step": 7711 }, { "epoch": 0.5429074269623372, "grad_norm": 2.0827126502990723, "learning_rate": 6.36880053294525e-06, "loss": 0.6356, "step": 7712 }, { "epoch": 0.5429778247096093, "grad_norm": 1.7739441394805908, "learning_rate": 6.3672109454709656e-06, "loss": 0.635, "step": 7713 }, { "epoch": 0.5430482224568813, "grad_norm": 1.8319505453109741, "learning_rate": 6.365621390895954e-06, "loss": 0.596, "step": 7714 }, { "epoch": 0.5431186202041535, "grad_norm": 1.9109606742858887, "learning_rate": 6.364031869302863e-06, "loss": 0.6564, "step": 7715 }, { "epoch": 0.5431890179514256, "grad_norm": 2.364642381668091, "learning_rate": 6.362442380774329e-06, "loss": 0.5471, "step": 7716 }, { "epoch": 0.5432594156986976, "grad_norm": 1.5500270128250122, "learning_rate": 6.360852925392996e-06, "loss": 0.646, "step": 7717 }, { "epoch": 0.5433298134459698, "grad_norm": 1.952462077140808, "learning_rate": 6.359263503241495e-06, "loss": 0.6316, "step": 7718 }, { "epoch": 0.5434002111932418, "grad_norm": 1.7297394275665283, "learning_rate": 6.357674114402465e-06, "loss": 0.6861, "step": 7719 }, { "epoch": 0.5434706089405139, "grad_norm": 1.754349708557129, "learning_rate": 6.3560847589585414e-06, "loss": 0.7703, "step": 7720 }, { "epoch": 0.543541006687786, "grad_norm": 1.634427547454834, "learning_rate": 6.3544954369923515e-06, "loss": 0.5949, "step": 7721 }, { "epoch": 0.5436114044350581, "grad_norm": 1.9098705053329468, "learning_rate": 6.352906148586532e-06, "loss": 0.6878, "step": 7722 }, { "epoch": 0.5436818021823302, "grad_norm": 2.0321969985961914, "learning_rate": 6.3513168938237055e-06, "loss": 0.7008, "step": 7723 }, { "epoch": 0.5437521999296022, "grad_norm": 1.682110071182251, "learning_rate": 6.349727672786503e-06, "loss": 0.5884, "step": 7724 }, { "epoch": 0.5438225976768744, "grad_norm": 1.8006662130355835, "learning_rate": 6.348138485557547e-06, "loss": 0.7076, "step": 7725 }, { "epoch": 0.5438929954241464, "grad_norm": 1.7511736154556274, "learning_rate": 6.346549332219462e-06, "loss": 0.6913, "step": 7726 }, { "epoch": 0.5439633931714185, "grad_norm": 1.701819896697998, "learning_rate": 6.344960212854867e-06, "loss": 0.6998, "step": 7727 }, { "epoch": 0.5440337909186906, "grad_norm": 1.7978006601333618, "learning_rate": 6.3433711275463855e-06, "loss": 0.6307, "step": 7728 }, { "epoch": 0.5441041886659627, "grad_norm": 1.6168092489242554, "learning_rate": 6.341782076376633e-06, "loss": 0.7069, "step": 7729 }, { "epoch": 0.5441745864132348, "grad_norm": 1.6781070232391357, "learning_rate": 6.340193059428228e-06, "loss": 0.6785, "step": 7730 }, { "epoch": 0.5442449841605068, "grad_norm": 1.689842700958252, "learning_rate": 6.338604076783781e-06, "loss": 0.6174, "step": 7731 }, { "epoch": 0.544315381907779, "grad_norm": 1.880469560623169, "learning_rate": 6.3370151285259095e-06, "loss": 0.7214, "step": 7732 }, { "epoch": 0.5443857796550511, "grad_norm": 1.6030569076538086, "learning_rate": 6.3354262147372185e-06, "loss": 0.7172, "step": 7733 }, { "epoch": 0.5444561774023231, "grad_norm": 1.8071589469909668, "learning_rate": 6.333837335500324e-06, "loss": 0.6472, "step": 7734 }, { "epoch": 0.5445265751495952, "grad_norm": 1.875027060508728, "learning_rate": 6.3322484908978274e-06, "loss": 0.5985, "step": 7735 }, { "epoch": 0.5445969728968673, "grad_norm": 1.9853671789169312, "learning_rate": 6.330659681012339e-06, "loss": 0.7119, "step": 7736 }, { "epoch": 0.5446673706441394, "grad_norm": 1.5938758850097656, "learning_rate": 6.329070905926458e-06, "loss": 0.579, "step": 7737 }, { "epoch": 0.5447377683914115, "grad_norm": 1.9238793849945068, "learning_rate": 6.32748216572279e-06, "loss": 0.7336, "step": 7738 }, { "epoch": 0.5448081661386835, "grad_norm": 1.4869035482406616, "learning_rate": 6.32589346048393e-06, "loss": 0.8402, "step": 7739 }, { "epoch": 0.5448785638859557, "grad_norm": 1.7831963300704956, "learning_rate": 6.3243047902924826e-06, "loss": 0.561, "step": 7740 }, { "epoch": 0.5449489616332277, "grad_norm": 1.708897352218628, "learning_rate": 6.322716155231039e-06, "loss": 0.696, "step": 7741 }, { "epoch": 0.5450193593804998, "grad_norm": 1.911719799041748, "learning_rate": 6.321127555382197e-06, "loss": 0.678, "step": 7742 }, { "epoch": 0.5450897571277719, "grad_norm": 1.6607388257980347, "learning_rate": 6.319538990828548e-06, "loss": 0.7441, "step": 7743 }, { "epoch": 0.545160154875044, "grad_norm": 1.7611827850341797, "learning_rate": 6.317950461652684e-06, "loss": 0.7386, "step": 7744 }, { "epoch": 0.5452305526223161, "grad_norm": 1.5957032442092896, "learning_rate": 6.31636196793719e-06, "loss": 0.7512, "step": 7745 }, { "epoch": 0.5453009503695881, "grad_norm": 1.479157567024231, "learning_rate": 6.31477350976466e-06, "loss": 0.5717, "step": 7746 }, { "epoch": 0.5453713481168603, "grad_norm": 1.632075548171997, "learning_rate": 6.3131850872176745e-06, "loss": 0.7369, "step": 7747 }, { "epoch": 0.5454417458641323, "grad_norm": 1.8773629665374756, "learning_rate": 6.3115967003788195e-06, "loss": 0.7224, "step": 7748 }, { "epoch": 0.5455121436114044, "grad_norm": 1.9852845668792725, "learning_rate": 6.3100083493306735e-06, "loss": 0.8419, "step": 7749 }, { "epoch": 0.5455825413586766, "grad_norm": 1.5376378297805786, "learning_rate": 6.30842003415582e-06, "loss": 0.6409, "step": 7750 }, { "epoch": 0.5456529391059486, "grad_norm": 1.5901210308074951, "learning_rate": 6.306831754936833e-06, "loss": 0.6792, "step": 7751 }, { "epoch": 0.5457233368532207, "grad_norm": 1.3552205562591553, "learning_rate": 6.305243511756293e-06, "loss": 0.6229, "step": 7752 }, { "epoch": 0.5457937346004927, "grad_norm": 1.80763840675354, "learning_rate": 6.303655304696771e-06, "loss": 0.6877, "step": 7753 }, { "epoch": 0.5458641323477649, "grad_norm": 1.7056670188903809, "learning_rate": 6.302067133840842e-06, "loss": 0.6865, "step": 7754 }, { "epoch": 0.545934530095037, "grad_norm": 1.5706933736801147, "learning_rate": 6.3004789992710715e-06, "loss": 0.7855, "step": 7755 }, { "epoch": 0.546004927842309, "grad_norm": 1.794345498085022, "learning_rate": 6.298890901070036e-06, "loss": 0.6808, "step": 7756 }, { "epoch": 0.5460753255895812, "grad_norm": 1.8097459077835083, "learning_rate": 6.297302839320293e-06, "loss": 0.679, "step": 7757 }, { "epoch": 0.5461457233368532, "grad_norm": 1.6874432563781738, "learning_rate": 6.295714814104415e-06, "loss": 0.7084, "step": 7758 }, { "epoch": 0.5462161210841253, "grad_norm": 2.2061455249786377, "learning_rate": 6.2941268255049585e-06, "loss": 0.7477, "step": 7759 }, { "epoch": 0.5462865188313974, "grad_norm": 1.7836071252822876, "learning_rate": 6.292538873604491e-06, "loss": 0.689, "step": 7760 }, { "epoch": 0.5463569165786695, "grad_norm": 1.533530354499817, "learning_rate": 6.290950958485564e-06, "loss": 0.6504, "step": 7761 }, { "epoch": 0.5464273143259416, "grad_norm": 2.126983880996704, "learning_rate": 6.289363080230745e-06, "loss": 0.7033, "step": 7762 }, { "epoch": 0.5464977120732136, "grad_norm": 1.6140657663345337, "learning_rate": 6.287775238922577e-06, "loss": 0.6282, "step": 7763 }, { "epoch": 0.5465681098204858, "grad_norm": 2.0173208713531494, "learning_rate": 6.286187434643622e-06, "loss": 0.5791, "step": 7764 }, { "epoch": 0.5466385075677578, "grad_norm": 2.033362865447998, "learning_rate": 6.284599667476429e-06, "loss": 0.6204, "step": 7765 }, { "epoch": 0.5467089053150299, "grad_norm": 2.0439112186431885, "learning_rate": 6.283011937503548e-06, "loss": 0.6598, "step": 7766 }, { "epoch": 0.546779303062302, "grad_norm": 2.366356611251831, "learning_rate": 6.281424244807522e-06, "loss": 0.6643, "step": 7767 }, { "epoch": 0.5468497008095741, "grad_norm": 1.7889009714126587, "learning_rate": 6.2798365894709055e-06, "loss": 0.8412, "step": 7768 }, { "epoch": 0.5469200985568462, "grad_norm": 2.3389923572540283, "learning_rate": 6.2782489715762325e-06, "loss": 0.6406, "step": 7769 }, { "epoch": 0.5469904963041182, "grad_norm": 1.9505542516708374, "learning_rate": 6.27666139120605e-06, "loss": 0.6782, "step": 7770 }, { "epoch": 0.5470608940513904, "grad_norm": 1.8522621393203735, "learning_rate": 6.275073848442899e-06, "loss": 0.7187, "step": 7771 }, { "epoch": 0.5471312917986625, "grad_norm": 2.5237157344818115, "learning_rate": 6.273486343369312e-06, "loss": 0.69, "step": 7772 }, { "epoch": 0.5472016895459345, "grad_norm": 1.6449689865112305, "learning_rate": 6.271898876067831e-06, "loss": 0.5369, "step": 7773 }, { "epoch": 0.5472720872932066, "grad_norm": 1.5917623043060303, "learning_rate": 6.2703114466209846e-06, "loss": 0.6745, "step": 7774 }, { "epoch": 0.5473424850404787, "grad_norm": 1.810890555381775, "learning_rate": 6.26872405511131e-06, "loss": 0.6137, "step": 7775 }, { "epoch": 0.5474128827877508, "grad_norm": 1.991504430770874, "learning_rate": 6.2671367016213306e-06, "loss": 0.633, "step": 7776 }, { "epoch": 0.5474832805350229, "grad_norm": 1.7536638975143433, "learning_rate": 6.26554938623358e-06, "loss": 0.6377, "step": 7777 }, { "epoch": 0.547553678282295, "grad_norm": 1.6204469203948975, "learning_rate": 6.263962109030579e-06, "loss": 0.6295, "step": 7778 }, { "epoch": 0.5476240760295671, "grad_norm": 6.28739070892334, "learning_rate": 6.2623748700948584e-06, "loss": 0.6484, "step": 7779 }, { "epoch": 0.5476944737768391, "grad_norm": 1.763731837272644, "learning_rate": 6.260787669508934e-06, "loss": 0.6419, "step": 7780 }, { "epoch": 0.5477648715241112, "grad_norm": 3.647878408432007, "learning_rate": 6.25920050735533e-06, "loss": 0.6871, "step": 7781 }, { "epoch": 0.5478352692713833, "grad_norm": 1.8419873714447021, "learning_rate": 6.257613383716561e-06, "loss": 0.7531, "step": 7782 }, { "epoch": 0.5479056670186554, "grad_norm": 2.0884478092193604, "learning_rate": 6.256026298675146e-06, "loss": 0.743, "step": 7783 }, { "epoch": 0.5479760647659275, "grad_norm": 1.6600134372711182, "learning_rate": 6.254439252313594e-06, "loss": 0.7121, "step": 7784 }, { "epoch": 0.5480464625131996, "grad_norm": 1.8200267553329468, "learning_rate": 6.252852244714424e-06, "loss": 0.6272, "step": 7785 }, { "epoch": 0.5481168602604717, "grad_norm": 1.8633325099945068, "learning_rate": 6.251265275960141e-06, "loss": 0.6545, "step": 7786 }, { "epoch": 0.5481872580077437, "grad_norm": 2.0975241661071777, "learning_rate": 6.249678346133256e-06, "loss": 0.7797, "step": 7787 }, { "epoch": 0.5482576557550158, "grad_norm": 1.8973156213760376, "learning_rate": 6.2480914553162715e-06, "loss": 0.6801, "step": 7788 }, { "epoch": 0.548328053502288, "grad_norm": 1.8868952989578247, "learning_rate": 6.246504603591694e-06, "loss": 0.745, "step": 7789 }, { "epoch": 0.54839845124956, "grad_norm": 1.9569071531295776, "learning_rate": 6.244917791042022e-06, "loss": 0.7399, "step": 7790 }, { "epoch": 0.5484688489968321, "grad_norm": 2.0065667629241943, "learning_rate": 6.243331017749762e-06, "loss": 0.773, "step": 7791 }, { "epoch": 0.5485392467441041, "grad_norm": 1.9541308879852295, "learning_rate": 6.241744283797405e-06, "loss": 0.6835, "step": 7792 }, { "epoch": 0.5486096444913763, "grad_norm": 2.238441228866577, "learning_rate": 6.240157589267452e-06, "loss": 0.6994, "step": 7793 }, { "epoch": 0.5486800422386484, "grad_norm": 1.6678476333618164, "learning_rate": 6.238570934242392e-06, "loss": 0.6664, "step": 7794 }, { "epoch": 0.5487504399859204, "grad_norm": 1.9458122253417969, "learning_rate": 6.236984318804719e-06, "loss": 0.7266, "step": 7795 }, { "epoch": 0.5488208377331926, "grad_norm": 1.595186471939087, "learning_rate": 6.235397743036921e-06, "loss": 0.7331, "step": 7796 }, { "epoch": 0.5488912354804646, "grad_norm": 1.5885337591171265, "learning_rate": 6.233811207021489e-06, "loss": 0.6458, "step": 7797 }, { "epoch": 0.5489616332277367, "grad_norm": 1.6301014423370361, "learning_rate": 6.232224710840905e-06, "loss": 0.702, "step": 7798 }, { "epoch": 0.5490320309750087, "grad_norm": 1.7431066036224365, "learning_rate": 6.2306382545776555e-06, "loss": 0.562, "step": 7799 }, { "epoch": 0.5491024287222809, "grad_norm": 1.7357741594314575, "learning_rate": 6.229051838314218e-06, "loss": 0.6332, "step": 7800 }, { "epoch": 0.549172826469553, "grad_norm": 1.5846047401428223, "learning_rate": 6.227465462133075e-06, "loss": 0.578, "step": 7801 }, { "epoch": 0.549243224216825, "grad_norm": 1.6287704706192017, "learning_rate": 6.225879126116699e-06, "loss": 0.6602, "step": 7802 }, { "epoch": 0.5493136219640972, "grad_norm": 2.342620849609375, "learning_rate": 6.224292830347572e-06, "loss": 0.6662, "step": 7803 }, { "epoch": 0.5493840197113692, "grad_norm": 1.847562313079834, "learning_rate": 6.222706574908162e-06, "loss": 0.8395, "step": 7804 }, { "epoch": 0.5494544174586413, "grad_norm": 1.5138769149780273, "learning_rate": 6.221120359880942e-06, "loss": 0.7586, "step": 7805 }, { "epoch": 0.5495248152059135, "grad_norm": 1.6749473810195923, "learning_rate": 6.219534185348379e-06, "loss": 0.5534, "step": 7806 }, { "epoch": 0.5495952129531855, "grad_norm": 1.8313485383987427, "learning_rate": 6.21794805139294e-06, "loss": 0.6427, "step": 7807 }, { "epoch": 0.5496656107004576, "grad_norm": 1.603509545326233, "learning_rate": 6.216361958097089e-06, "loss": 0.5611, "step": 7808 }, { "epoch": 0.5497360084477296, "grad_norm": 1.8518643379211426, "learning_rate": 6.214775905543292e-06, "loss": 0.717, "step": 7809 }, { "epoch": 0.5498064061950018, "grad_norm": 1.5756397247314453, "learning_rate": 6.213189893814006e-06, "loss": 0.5757, "step": 7810 }, { "epoch": 0.5498768039422739, "grad_norm": 1.812091588973999, "learning_rate": 6.2116039229916905e-06, "loss": 0.6676, "step": 7811 }, { "epoch": 0.5499472016895459, "grad_norm": 1.6875733137130737, "learning_rate": 6.210017993158799e-06, "loss": 0.7729, "step": 7812 }, { "epoch": 0.550017599436818, "grad_norm": 1.8549760580062866, "learning_rate": 6.208432104397791e-06, "loss": 0.6248, "step": 7813 }, { "epoch": 0.5500879971840901, "grad_norm": 1.8881546258926392, "learning_rate": 6.2068462567911115e-06, "loss": 0.7164, "step": 7814 }, { "epoch": 0.5501583949313622, "grad_norm": 1.8984192609786987, "learning_rate": 6.205260450421216e-06, "loss": 0.6638, "step": 7815 }, { "epoch": 0.5502287926786343, "grad_norm": 1.602730631828308, "learning_rate": 6.203674685370547e-06, "loss": 0.5983, "step": 7816 }, { "epoch": 0.5502991904259064, "grad_norm": 1.7371748685836792, "learning_rate": 6.202088961721555e-06, "loss": 0.6315, "step": 7817 }, { "epoch": 0.5503695881731785, "grad_norm": 1.7810018062591553, "learning_rate": 6.200503279556677e-06, "loss": 0.6932, "step": 7818 }, { "epoch": 0.5504399859204505, "grad_norm": 1.712323784828186, "learning_rate": 6.1989176389583575e-06, "loss": 0.6673, "step": 7819 }, { "epoch": 0.5505103836677226, "grad_norm": 1.644753098487854, "learning_rate": 6.19733204000904e-06, "loss": 0.7971, "step": 7820 }, { "epoch": 0.5505807814149947, "grad_norm": 1.863103985786438, "learning_rate": 6.195746482791153e-06, "loss": 0.7579, "step": 7821 }, { "epoch": 0.5506511791622668, "grad_norm": 1.4730033874511719, "learning_rate": 6.194160967387137e-06, "loss": 0.6757, "step": 7822 }, { "epoch": 0.5507215769095389, "grad_norm": 1.634902000427246, "learning_rate": 6.192575493879418e-06, "loss": 0.7041, "step": 7823 }, { "epoch": 0.550791974656811, "grad_norm": 1.6455365419387817, "learning_rate": 6.190990062350435e-06, "loss": 0.6545, "step": 7824 }, { "epoch": 0.5508623724040831, "grad_norm": 1.6908098459243774, "learning_rate": 6.189404672882609e-06, "loss": 0.686, "step": 7825 }, { "epoch": 0.5509327701513551, "grad_norm": 1.853636384010315, "learning_rate": 6.18781932555837e-06, "loss": 0.5737, "step": 7826 }, { "epoch": 0.5510031678986272, "grad_norm": 1.9686968326568604, "learning_rate": 6.186234020460138e-06, "loss": 0.7403, "step": 7827 }, { "epoch": 0.5510735656458994, "grad_norm": 1.9566203355789185, "learning_rate": 6.184648757670338e-06, "loss": 0.6118, "step": 7828 }, { "epoch": 0.5511439633931714, "grad_norm": 1.5814067125320435, "learning_rate": 6.183063537271384e-06, "loss": 0.6354, "step": 7829 }, { "epoch": 0.5512143611404435, "grad_norm": 1.9488803148269653, "learning_rate": 6.1814783593457e-06, "loss": 0.8212, "step": 7830 }, { "epoch": 0.5512847588877156, "grad_norm": 1.7936046123504639, "learning_rate": 6.179893223975697e-06, "loss": 0.6823, "step": 7831 }, { "epoch": 0.5513551566349877, "grad_norm": 1.7793817520141602, "learning_rate": 6.178308131243788e-06, "loss": 0.7867, "step": 7832 }, { "epoch": 0.5514255543822598, "grad_norm": 1.6854156255722046, "learning_rate": 6.176723081232382e-06, "loss": 0.6536, "step": 7833 }, { "epoch": 0.5514959521295318, "grad_norm": 1.6729825735092163, "learning_rate": 6.17513807402389e-06, "loss": 0.7352, "step": 7834 }, { "epoch": 0.551566349876804, "grad_norm": 1.5397371053695679, "learning_rate": 6.173553109700715e-06, "loss": 0.7051, "step": 7835 }, { "epoch": 0.551636747624076, "grad_norm": 2.3712105751037598, "learning_rate": 6.171968188345264e-06, "loss": 0.8233, "step": 7836 }, { "epoch": 0.5517071453713481, "grad_norm": 1.6892001628875732, "learning_rate": 6.170383310039936e-06, "loss": 0.6877, "step": 7837 }, { "epoch": 0.5517775431186202, "grad_norm": 1.913009524345398, "learning_rate": 6.168798474867132e-06, "loss": 0.763, "step": 7838 }, { "epoch": 0.5518479408658923, "grad_norm": 1.5469714403152466, "learning_rate": 6.167213682909246e-06, "loss": 0.7208, "step": 7839 }, { "epoch": 0.5519183386131644, "grad_norm": 1.9929800033569336, "learning_rate": 6.165628934248678e-06, "loss": 0.6664, "step": 7840 }, { "epoch": 0.5519887363604364, "grad_norm": 1.8267264366149902, "learning_rate": 6.164044228967813e-06, "loss": 0.715, "step": 7841 }, { "epoch": 0.5520591341077086, "grad_norm": 1.7379592657089233, "learning_rate": 6.162459567149049e-06, "loss": 0.8288, "step": 7842 }, { "epoch": 0.5521295318549806, "grad_norm": 1.8503873348236084, "learning_rate": 6.160874948874769e-06, "loss": 0.6294, "step": 7843 }, { "epoch": 0.5521999296022527, "grad_norm": 1.7426378726959229, "learning_rate": 6.159290374227363e-06, "loss": 0.6546, "step": 7844 }, { "epoch": 0.5522703273495249, "grad_norm": 1.596944808959961, "learning_rate": 6.157705843289208e-06, "loss": 0.6646, "step": 7845 }, { "epoch": 0.5523407250967969, "grad_norm": 1.6603950262069702, "learning_rate": 6.156121356142693e-06, "loss": 0.6061, "step": 7846 }, { "epoch": 0.552411122844069, "grad_norm": 1.8152450323104858, "learning_rate": 6.154536912870188e-06, "loss": 0.6886, "step": 7847 }, { "epoch": 0.552481520591341, "grad_norm": 1.810212254524231, "learning_rate": 6.152952513554079e-06, "loss": 0.7911, "step": 7848 }, { "epoch": 0.5525519183386132, "grad_norm": 1.7930800914764404, "learning_rate": 6.151368158276733e-06, "loss": 0.6087, "step": 7849 }, { "epoch": 0.5526223160858853, "grad_norm": 1.7702215909957886, "learning_rate": 6.149783847120528e-06, "loss": 0.745, "step": 7850 }, { "epoch": 0.5526927138331573, "grad_norm": 1.8680458068847656, "learning_rate": 6.148199580167828e-06, "loss": 0.6226, "step": 7851 }, { "epoch": 0.5527631115804295, "grad_norm": 1.884903073310852, "learning_rate": 6.146615357501007e-06, "loss": 0.722, "step": 7852 }, { "epoch": 0.5528335093277015, "grad_norm": 1.7139201164245605, "learning_rate": 6.145031179202421e-06, "loss": 0.7422, "step": 7853 }, { "epoch": 0.5529039070749736, "grad_norm": 1.613330364227295, "learning_rate": 6.143447045354442e-06, "loss": 0.6888, "step": 7854 }, { "epoch": 0.5529743048222457, "grad_norm": 1.9915255308151245, "learning_rate": 6.1418629560394255e-06, "loss": 0.7366, "step": 7855 }, { "epoch": 0.5530447025695178, "grad_norm": 2.0132555961608887, "learning_rate": 6.140278911339734e-06, "loss": 0.7329, "step": 7856 }, { "epoch": 0.5531151003167899, "grad_norm": 1.7994076013565063, "learning_rate": 6.138694911337716e-06, "loss": 0.7698, "step": 7857 }, { "epoch": 0.5531854980640619, "grad_norm": 1.7677419185638428, "learning_rate": 6.137110956115734e-06, "loss": 0.6387, "step": 7858 }, { "epoch": 0.553255895811334, "grad_norm": 2.693716049194336, "learning_rate": 6.135527045756131e-06, "loss": 0.6487, "step": 7859 }, { "epoch": 0.5533262935586061, "grad_norm": 1.477164626121521, "learning_rate": 6.133943180341261e-06, "loss": 0.7204, "step": 7860 }, { "epoch": 0.5533966913058782, "grad_norm": 1.6951420307159424, "learning_rate": 6.13235935995347e-06, "loss": 0.6494, "step": 7861 }, { "epoch": 0.5534670890531503, "grad_norm": 1.5593469142913818, "learning_rate": 6.130775584675102e-06, "loss": 0.7315, "step": 7862 }, { "epoch": 0.5535374868004224, "grad_norm": 1.5054998397827148, "learning_rate": 6.1291918545884965e-06, "loss": 0.7115, "step": 7863 }, { "epoch": 0.5536078845476945, "grad_norm": 1.5793615579605103, "learning_rate": 6.127608169776e-06, "loss": 0.652, "step": 7864 }, { "epoch": 0.5536782822949665, "grad_norm": 1.7108153104782104, "learning_rate": 6.126024530319938e-06, "loss": 0.8334, "step": 7865 }, { "epoch": 0.5537486800422387, "grad_norm": 1.6228687763214111, "learning_rate": 6.124440936302658e-06, "loss": 0.6226, "step": 7866 }, { "epoch": 0.5538190777895108, "grad_norm": 2.0125041007995605, "learning_rate": 6.122857387806484e-06, "loss": 0.6926, "step": 7867 }, { "epoch": 0.5538894755367828, "grad_norm": 2.494385242462158, "learning_rate": 6.121273884913749e-06, "loss": 0.6777, "step": 7868 }, { "epoch": 0.5539598732840549, "grad_norm": 1.736871361732483, "learning_rate": 6.11969042770678e-06, "loss": 0.6219, "step": 7869 }, { "epoch": 0.554030271031327, "grad_norm": 1.700073480606079, "learning_rate": 6.118107016267902e-06, "loss": 0.7423, "step": 7870 }, { "epoch": 0.5541006687785991, "grad_norm": 1.6689077615737915, "learning_rate": 6.116523650679442e-06, "loss": 0.7274, "step": 7871 }, { "epoch": 0.5541710665258712, "grad_norm": 2.0224971771240234, "learning_rate": 6.114940331023716e-06, "loss": 0.7108, "step": 7872 }, { "epoch": 0.5542414642731432, "grad_norm": 1.6471261978149414, "learning_rate": 6.1133570573830446e-06, "loss": 0.6621, "step": 7873 }, { "epoch": 0.5543118620204154, "grad_norm": 1.9003328084945679, "learning_rate": 6.11177382983974e-06, "loss": 0.6419, "step": 7874 }, { "epoch": 0.5543822597676874, "grad_norm": 1.3469488620758057, "learning_rate": 6.110190648476122e-06, "loss": 0.5715, "step": 7875 }, { "epoch": 0.5544526575149595, "grad_norm": 1.8763664960861206, "learning_rate": 6.108607513374496e-06, "loss": 0.6316, "step": 7876 }, { "epoch": 0.5545230552622316, "grad_norm": 1.8494656085968018, "learning_rate": 6.107024424617173e-06, "loss": 0.7565, "step": 7877 }, { "epoch": 0.5545934530095037, "grad_norm": 1.6249184608459473, "learning_rate": 6.105441382286459e-06, "loss": 0.7672, "step": 7878 }, { "epoch": 0.5546638507567758, "grad_norm": 1.6247644424438477, "learning_rate": 6.103858386464659e-06, "loss": 0.6103, "step": 7879 }, { "epoch": 0.5547342485040478, "grad_norm": 2.0301427841186523, "learning_rate": 6.10227543723407e-06, "loss": 0.7038, "step": 7880 }, { "epoch": 0.55480464625132, "grad_norm": 2.321446418762207, "learning_rate": 6.100692534676996e-06, "loss": 0.7637, "step": 7881 }, { "epoch": 0.554875043998592, "grad_norm": 1.702883005142212, "learning_rate": 6.099109678875732e-06, "loss": 0.6676, "step": 7882 }, { "epoch": 0.5549454417458641, "grad_norm": 1.9499701261520386, "learning_rate": 6.097526869912572e-06, "loss": 0.6695, "step": 7883 }, { "epoch": 0.5550158394931363, "grad_norm": 1.8997308015823364, "learning_rate": 6.095944107869806e-06, "loss": 0.6846, "step": 7884 }, { "epoch": 0.5550862372404083, "grad_norm": 1.508699893951416, "learning_rate": 6.094361392829726e-06, "loss": 0.6901, "step": 7885 }, { "epoch": 0.5551566349876804, "grad_norm": 1.8020700216293335, "learning_rate": 6.092778724874615e-06, "loss": 0.7194, "step": 7886 }, { "epoch": 0.5552270327349524, "grad_norm": 1.5317384004592896, "learning_rate": 6.091196104086761e-06, "loss": 0.698, "step": 7887 }, { "epoch": 0.5552974304822246, "grad_norm": 1.609717607498169, "learning_rate": 6.089613530548444e-06, "loss": 0.6938, "step": 7888 }, { "epoch": 0.5553678282294967, "grad_norm": 1.6532469987869263, "learning_rate": 6.0880310043419466e-06, "loss": 0.632, "step": 7889 }, { "epoch": 0.5554382259767687, "grad_norm": 1.6995035409927368, "learning_rate": 6.086448525549541e-06, "loss": 0.6097, "step": 7890 }, { "epoch": 0.5555086237240409, "grad_norm": 1.60550057888031, "learning_rate": 6.084866094253506e-06, "loss": 0.5566, "step": 7891 }, { "epoch": 0.5555790214713129, "grad_norm": 1.702028512954712, "learning_rate": 6.083283710536107e-06, "loss": 0.6571, "step": 7892 }, { "epoch": 0.555649419218585, "grad_norm": 1.7814414501190186, "learning_rate": 6.081701374479623e-06, "loss": 0.723, "step": 7893 }, { "epoch": 0.555719816965857, "grad_norm": 2.0688118934631348, "learning_rate": 6.080119086166314e-06, "loss": 0.8083, "step": 7894 }, { "epoch": 0.5557902147131292, "grad_norm": 1.6852928400039673, "learning_rate": 6.078536845678447e-06, "loss": 0.7056, "step": 7895 }, { "epoch": 0.5558606124604013, "grad_norm": 1.9891263246536255, "learning_rate": 6.076954653098283e-06, "loss": 0.6895, "step": 7896 }, { "epoch": 0.5559310102076733, "grad_norm": 1.9008475542068481, "learning_rate": 6.075372508508085e-06, "loss": 0.6274, "step": 7897 }, { "epoch": 0.5560014079549455, "grad_norm": 1.7858803272247314, "learning_rate": 6.073790411990104e-06, "loss": 0.744, "step": 7898 }, { "epoch": 0.5560718057022175, "grad_norm": 1.5384825468063354, "learning_rate": 6.072208363626602e-06, "loss": 0.5997, "step": 7899 }, { "epoch": 0.5561422034494896, "grad_norm": 1.9604229927062988, "learning_rate": 6.070626363499825e-06, "loss": 0.6303, "step": 7900 }, { "epoch": 0.5562126011967617, "grad_norm": 1.8595439195632935, "learning_rate": 6.069044411692028e-06, "loss": 0.7508, "step": 7901 }, { "epoch": 0.5562829989440338, "grad_norm": 1.8011680841445923, "learning_rate": 6.067462508285452e-06, "loss": 0.7479, "step": 7902 }, { "epoch": 0.5563533966913059, "grad_norm": 1.6678258180618286, "learning_rate": 6.065880653362346e-06, "loss": 0.6622, "step": 7903 }, { "epoch": 0.5564237944385779, "grad_norm": 1.7199361324310303, "learning_rate": 6.0642988470049495e-06, "loss": 0.7401, "step": 7904 }, { "epoch": 0.5564941921858501, "grad_norm": 1.56338369846344, "learning_rate": 6.062717089295505e-06, "loss": 0.6775, "step": 7905 }, { "epoch": 0.5565645899331222, "grad_norm": 1.675711750984192, "learning_rate": 6.061135380316248e-06, "loss": 0.6727, "step": 7906 }, { "epoch": 0.5566349876803942, "grad_norm": 1.6156067848205566, "learning_rate": 6.059553720149413e-06, "loss": 0.5882, "step": 7907 }, { "epoch": 0.5567053854276663, "grad_norm": 1.584352970123291, "learning_rate": 6.05797210887723e-06, "loss": 0.775, "step": 7908 }, { "epoch": 0.5567757831749384, "grad_norm": 1.6168479919433594, "learning_rate": 6.056390546581933e-06, "loss": 0.7491, "step": 7909 }, { "epoch": 0.5568461809222105, "grad_norm": 1.8831716775894165, "learning_rate": 6.054809033345742e-06, "loss": 0.7852, "step": 7910 }, { "epoch": 0.5569165786694826, "grad_norm": 2.036759853363037, "learning_rate": 6.053227569250889e-06, "loss": 0.6338, "step": 7911 }, { "epoch": 0.5569869764167547, "grad_norm": 2.245087146759033, "learning_rate": 6.051646154379591e-06, "loss": 0.6314, "step": 7912 }, { "epoch": 0.5570573741640268, "grad_norm": 1.5066320896148682, "learning_rate": 6.050064788814069e-06, "loss": 0.7276, "step": 7913 }, { "epoch": 0.5571277719112988, "grad_norm": 1.5262733697891235, "learning_rate": 6.048483472636537e-06, "loss": 0.6712, "step": 7914 }, { "epoch": 0.5571981696585709, "grad_norm": 1.693677306175232, "learning_rate": 6.046902205929214e-06, "loss": 0.5929, "step": 7915 }, { "epoch": 0.557268567405843, "grad_norm": 1.5693113803863525, "learning_rate": 6.045320988774304e-06, "loss": 0.651, "step": 7916 }, { "epoch": 0.5573389651531151, "grad_norm": 2.1736855506896973, "learning_rate": 6.043739821254025e-06, "loss": 0.6768, "step": 7917 }, { "epoch": 0.5574093629003872, "grad_norm": 1.8210651874542236, "learning_rate": 6.042158703450575e-06, "loss": 0.6753, "step": 7918 }, { "epoch": 0.5574797606476593, "grad_norm": 2.080321788787842, "learning_rate": 6.040577635446165e-06, "loss": 0.633, "step": 7919 }, { "epoch": 0.5575501583949314, "grad_norm": 1.5413209199905396, "learning_rate": 6.038996617322989e-06, "loss": 0.8054, "step": 7920 }, { "epoch": 0.5576205561422034, "grad_norm": 2.0382888317108154, "learning_rate": 6.03741564916325e-06, "loss": 0.6002, "step": 7921 }, { "epoch": 0.5576909538894755, "grad_norm": 1.9797132015228271, "learning_rate": 6.0358347310491455e-06, "loss": 0.8636, "step": 7922 }, { "epoch": 0.5577613516367477, "grad_norm": 1.9003655910491943, "learning_rate": 6.034253863062864e-06, "loss": 0.7187, "step": 7923 }, { "epoch": 0.5578317493840197, "grad_norm": 1.9913098812103271, "learning_rate": 6.0326730452866014e-06, "loss": 0.5796, "step": 7924 }, { "epoch": 0.5579021471312918, "grad_norm": 1.6400822401046753, "learning_rate": 6.03109227780254e-06, "loss": 0.6708, "step": 7925 }, { "epoch": 0.5579725448785638, "grad_norm": 1.8751401901245117, "learning_rate": 6.029511560692872e-06, "loss": 0.6449, "step": 7926 }, { "epoch": 0.558042942625836, "grad_norm": 1.8307408094406128, "learning_rate": 6.027930894039775e-06, "loss": 0.6453, "step": 7927 }, { "epoch": 0.5581133403731081, "grad_norm": 1.7636337280273438, "learning_rate": 6.026350277925433e-06, "loss": 0.7186, "step": 7928 }, { "epoch": 0.5581837381203801, "grad_norm": 1.6545605659484863, "learning_rate": 6.0247697124320196e-06, "loss": 0.6821, "step": 7929 }, { "epoch": 0.5582541358676523, "grad_norm": 1.826183557510376, "learning_rate": 6.023189197641714e-06, "loss": 0.7317, "step": 7930 }, { "epoch": 0.5583245336149243, "grad_norm": 2.13765549659729, "learning_rate": 6.021608733636683e-06, "loss": 0.6969, "step": 7931 }, { "epoch": 0.5583949313621964, "grad_norm": 2.062459707260132, "learning_rate": 6.020028320499103e-06, "loss": 0.7376, "step": 7932 }, { "epoch": 0.5584653291094684, "grad_norm": 1.8655294179916382, "learning_rate": 6.018447958311138e-06, "loss": 0.7229, "step": 7933 }, { "epoch": 0.5585357268567406, "grad_norm": 1.6662014722824097, "learning_rate": 6.016867647154954e-06, "loss": 0.6873, "step": 7934 }, { "epoch": 0.5586061246040127, "grad_norm": 1.3693822622299194, "learning_rate": 6.015287387112708e-06, "loss": 0.7679, "step": 7935 }, { "epoch": 0.5586765223512847, "grad_norm": 2.117248773574829, "learning_rate": 6.013707178266566e-06, "loss": 0.6799, "step": 7936 }, { "epoch": 0.5587469200985569, "grad_norm": 1.6862576007843018, "learning_rate": 6.0121270206986774e-06, "loss": 0.6582, "step": 7937 }, { "epoch": 0.5588173178458289, "grad_norm": 1.497038722038269, "learning_rate": 6.010546914491202e-06, "loss": 0.7092, "step": 7938 }, { "epoch": 0.558887715593101, "grad_norm": 2.0590436458587646, "learning_rate": 6.008966859726287e-06, "loss": 0.6776, "step": 7939 }, { "epoch": 0.5589581133403732, "grad_norm": 1.5705978870391846, "learning_rate": 6.007386856486086e-06, "loss": 0.7331, "step": 7940 }, { "epoch": 0.5590285110876452, "grad_norm": 1.7697029113769531, "learning_rate": 6.005806904852738e-06, "loss": 0.6968, "step": 7941 }, { "epoch": 0.5590989088349173, "grad_norm": 1.9237359762191772, "learning_rate": 6.004227004908391e-06, "loss": 0.6228, "step": 7942 }, { "epoch": 0.5591693065821893, "grad_norm": 1.983215093612671, "learning_rate": 6.002647156735182e-06, "loss": 0.7387, "step": 7943 }, { "epoch": 0.5592397043294615, "grad_norm": 1.8781371116638184, "learning_rate": 6.001067360415252e-06, "loss": 0.6652, "step": 7944 }, { "epoch": 0.5593101020767336, "grad_norm": 1.8556292057037354, "learning_rate": 5.9994876160307345e-06, "loss": 0.69, "step": 7945 }, { "epoch": 0.5593804998240056, "grad_norm": 1.7247766256332397, "learning_rate": 5.997907923663762e-06, "loss": 0.7291, "step": 7946 }, { "epoch": 0.5594508975712777, "grad_norm": 1.7627551555633545, "learning_rate": 5.996328283396464e-06, "loss": 0.5833, "step": 7947 }, { "epoch": 0.5595212953185498, "grad_norm": 1.7377054691314697, "learning_rate": 5.994748695310969e-06, "loss": 0.6348, "step": 7948 }, { "epoch": 0.5595916930658219, "grad_norm": 2.0201780796051025, "learning_rate": 5.993169159489396e-06, "loss": 0.7364, "step": 7949 }, { "epoch": 0.5596620908130939, "grad_norm": 1.8187053203582764, "learning_rate": 5.991589676013873e-06, "loss": 0.661, "step": 7950 }, { "epoch": 0.5597324885603661, "grad_norm": 1.8144654035568237, "learning_rate": 5.990010244966515e-06, "loss": 0.5182, "step": 7951 }, { "epoch": 0.5598028863076382, "grad_norm": 2.0133955478668213, "learning_rate": 5.988430866429441e-06, "loss": 0.6452, "step": 7952 }, { "epoch": 0.5598732840549102, "grad_norm": 1.8166987895965576, "learning_rate": 5.9868515404847605e-06, "loss": 0.6887, "step": 7953 }, { "epoch": 0.5599436818021823, "grad_norm": 2.2275285720825195, "learning_rate": 5.985272267214587e-06, "loss": 0.7394, "step": 7954 }, { "epoch": 0.5600140795494544, "grad_norm": 1.905859112739563, "learning_rate": 5.983693046701024e-06, "loss": 0.5661, "step": 7955 }, { "epoch": 0.5600844772967265, "grad_norm": 1.9039393663406372, "learning_rate": 5.982113879026185e-06, "loss": 0.802, "step": 7956 }, { "epoch": 0.5601548750439986, "grad_norm": 2.0951530933380127, "learning_rate": 5.980534764272164e-06, "loss": 0.7596, "step": 7957 }, { "epoch": 0.5602252727912707, "grad_norm": 1.8769793510437012, "learning_rate": 5.978955702521065e-06, "loss": 0.6885, "step": 7958 }, { "epoch": 0.5602956705385428, "grad_norm": 1.6829839944839478, "learning_rate": 5.977376693854984e-06, "loss": 0.6187, "step": 7959 }, { "epoch": 0.5603660682858148, "grad_norm": 1.745316743850708, "learning_rate": 5.975797738356015e-06, "loss": 0.573, "step": 7960 }, { "epoch": 0.5604364660330869, "grad_norm": 1.6168361902236938, "learning_rate": 5.974218836106247e-06, "loss": 0.6525, "step": 7961 }, { "epoch": 0.5605068637803591, "grad_norm": 1.9214814901351929, "learning_rate": 5.972639987187773e-06, "loss": 0.7237, "step": 7962 }, { "epoch": 0.5605772615276311, "grad_norm": 1.498218297958374, "learning_rate": 5.971061191682675e-06, "loss": 0.5769, "step": 7963 }, { "epoch": 0.5606476592749032, "grad_norm": 1.6165777444839478, "learning_rate": 5.969482449673039e-06, "loss": 0.6724, "step": 7964 }, { "epoch": 0.5607180570221753, "grad_norm": 1.5349630117416382, "learning_rate": 5.967903761240943e-06, "loss": 0.6003, "step": 7965 }, { "epoch": 0.5607884547694474, "grad_norm": 1.69106125831604, "learning_rate": 5.966325126468466e-06, "loss": 0.6117, "step": 7966 }, { "epoch": 0.5608588525167195, "grad_norm": 1.8375771045684814, "learning_rate": 5.964746545437679e-06, "loss": 0.7374, "step": 7967 }, { "epoch": 0.5609292502639915, "grad_norm": 1.7580147981643677, "learning_rate": 5.9631680182306595e-06, "loss": 0.6896, "step": 7968 }, { "epoch": 0.5609996480112637, "grad_norm": 1.6259013414382935, "learning_rate": 5.961589544929473e-06, "loss": 0.6659, "step": 7969 }, { "epoch": 0.5610700457585357, "grad_norm": 1.7657780647277832, "learning_rate": 5.960011125616184e-06, "loss": 0.609, "step": 7970 }, { "epoch": 0.5611404435058078, "grad_norm": 1.7017349004745483, "learning_rate": 5.958432760372862e-06, "loss": 0.5578, "step": 7971 }, { "epoch": 0.5612108412530799, "grad_norm": 1.7058886289596558, "learning_rate": 5.956854449281562e-06, "loss": 0.6931, "step": 7972 }, { "epoch": 0.561281239000352, "grad_norm": 2.355654001235962, "learning_rate": 5.955276192424346e-06, "loss": 0.732, "step": 7973 }, { "epoch": 0.5613516367476241, "grad_norm": 1.742568016052246, "learning_rate": 5.953697989883265e-06, "loss": 0.7647, "step": 7974 }, { "epoch": 0.5614220344948961, "grad_norm": 1.7335954904556274, "learning_rate": 5.952119841740375e-06, "loss": 0.7305, "step": 7975 }, { "epoch": 0.5614924322421683, "grad_norm": 2.27711820602417, "learning_rate": 5.950541748077721e-06, "loss": 0.6409, "step": 7976 }, { "epoch": 0.5615628299894403, "grad_norm": 1.5322604179382324, "learning_rate": 5.948963708977354e-06, "loss": 0.7256, "step": 7977 }, { "epoch": 0.5616332277367124, "grad_norm": 2.0684492588043213, "learning_rate": 5.947385724521314e-06, "loss": 0.6971, "step": 7978 }, { "epoch": 0.5617036254839846, "grad_norm": 1.8770661354064941, "learning_rate": 5.9458077947916455e-06, "loss": 0.6825, "step": 7979 }, { "epoch": 0.5617740232312566, "grad_norm": 1.6417081356048584, "learning_rate": 5.9442299198703835e-06, "loss": 0.6335, "step": 7980 }, { "epoch": 0.5618444209785287, "grad_norm": 1.6648975610733032, "learning_rate": 5.9426520998395645e-06, "loss": 0.8031, "step": 7981 }, { "epoch": 0.5619148187258007, "grad_norm": 1.7941856384277344, "learning_rate": 5.941074334781218e-06, "loss": 0.6504, "step": 7982 }, { "epoch": 0.5619852164730729, "grad_norm": 1.6762281656265259, "learning_rate": 5.939496624777379e-06, "loss": 0.7527, "step": 7983 }, { "epoch": 0.562055614220345, "grad_norm": 1.7374897003173828, "learning_rate": 5.937918969910068e-06, "loss": 0.8159, "step": 7984 }, { "epoch": 0.562126011967617, "grad_norm": 1.5118826627731323, "learning_rate": 5.936341370261316e-06, "loss": 0.604, "step": 7985 }, { "epoch": 0.5621964097148892, "grad_norm": 1.980568289756775, "learning_rate": 5.934763825913136e-06, "loss": 0.6699, "step": 7986 }, { "epoch": 0.5622668074621612, "grad_norm": 1.874399185180664, "learning_rate": 5.933186336947551e-06, "loss": 0.5865, "step": 7987 }, { "epoch": 0.5623372052094333, "grad_norm": 1.669342279434204, "learning_rate": 5.931608903446572e-06, "loss": 0.6376, "step": 7988 }, { "epoch": 0.5624076029567053, "grad_norm": 2.170628786087036, "learning_rate": 5.930031525492216e-06, "loss": 0.6692, "step": 7989 }, { "epoch": 0.5624780007039775, "grad_norm": 1.756642460823059, "learning_rate": 5.9284542031664895e-06, "loss": 0.5137, "step": 7990 }, { "epoch": 0.5625483984512496, "grad_norm": 1.748650312423706, "learning_rate": 5.9268769365514e-06, "loss": 0.668, "step": 7991 }, { "epoch": 0.5626187961985216, "grad_norm": 1.8741737604141235, "learning_rate": 5.925299725728949e-06, "loss": 0.5808, "step": 7992 }, { "epoch": 0.5626891939457938, "grad_norm": 2.2097153663635254, "learning_rate": 5.923722570781141e-06, "loss": 0.68, "step": 7993 }, { "epoch": 0.5627595916930658, "grad_norm": 1.6831471920013428, "learning_rate": 5.922145471789967e-06, "loss": 0.7248, "step": 7994 }, { "epoch": 0.5628299894403379, "grad_norm": 1.8153493404388428, "learning_rate": 5.92056842883743e-06, "loss": 0.6609, "step": 7995 }, { "epoch": 0.56290038718761, "grad_norm": 1.974644660949707, "learning_rate": 5.918991442005517e-06, "loss": 0.5879, "step": 7996 }, { "epoch": 0.5629707849348821, "grad_norm": 2.070871114730835, "learning_rate": 5.917414511376218e-06, "loss": 0.6645, "step": 7997 }, { "epoch": 0.5630411826821542, "grad_norm": 1.852425456047058, "learning_rate": 5.915837637031518e-06, "loss": 0.712, "step": 7998 }, { "epoch": 0.5631115804294262, "grad_norm": 2.1686923503875732, "learning_rate": 5.9142608190534034e-06, "loss": 0.6717, "step": 7999 }, { "epoch": 0.5631819781766984, "grad_norm": 1.632304072380066, "learning_rate": 5.912684057523848e-06, "loss": 0.722, "step": 8000 }, { "epoch": 0.5632523759239705, "grad_norm": 1.6945463418960571, "learning_rate": 5.911107352524837e-06, "loss": 0.6435, "step": 8001 }, { "epoch": 0.5633227736712425, "grad_norm": 1.7457093000411987, "learning_rate": 5.9095307041383394e-06, "loss": 0.5957, "step": 8002 }, { "epoch": 0.5633931714185146, "grad_norm": 1.9915016889572144, "learning_rate": 5.907954112446329e-06, "loss": 0.6193, "step": 8003 }, { "epoch": 0.5634635691657867, "grad_norm": 1.5440304279327393, "learning_rate": 5.906377577530771e-06, "loss": 0.6912, "step": 8004 }, { "epoch": 0.5635339669130588, "grad_norm": 1.7858909368515015, "learning_rate": 5.904801099473637e-06, "loss": 0.6369, "step": 8005 }, { "epoch": 0.5636043646603308, "grad_norm": 1.8517088890075684, "learning_rate": 5.903224678356882e-06, "loss": 0.743, "step": 8006 }, { "epoch": 0.563674762407603, "grad_norm": 1.6636650562286377, "learning_rate": 5.901648314262471e-06, "loss": 0.7854, "step": 8007 }, { "epoch": 0.5637451601548751, "grad_norm": 1.635767936706543, "learning_rate": 5.900072007272358e-06, "loss": 0.8092, "step": 8008 }, { "epoch": 0.5638155579021471, "grad_norm": 1.8911882638931274, "learning_rate": 5.898495757468499e-06, "loss": 0.7298, "step": 8009 }, { "epoch": 0.5638859556494192, "grad_norm": 1.6515145301818848, "learning_rate": 5.896919564932842e-06, "loss": 0.6593, "step": 8010 }, { "epoch": 0.5639563533966913, "grad_norm": 1.8184762001037598, "learning_rate": 5.895343429747337e-06, "loss": 0.6766, "step": 8011 }, { "epoch": 0.5640267511439634, "grad_norm": 1.4885756969451904, "learning_rate": 5.8937673519939255e-06, "loss": 0.6093, "step": 8012 }, { "epoch": 0.5640971488912355, "grad_norm": 2.0468223094940186, "learning_rate": 5.892191331754553e-06, "loss": 0.6486, "step": 8013 }, { "epoch": 0.5641675466385075, "grad_norm": 1.823989748954773, "learning_rate": 5.8906153691111554e-06, "loss": 0.6621, "step": 8014 }, { "epoch": 0.5642379443857797, "grad_norm": 1.7834594249725342, "learning_rate": 5.88903946414567e-06, "loss": 0.7715, "step": 8015 }, { "epoch": 0.5643083421330517, "grad_norm": 1.958844542503357, "learning_rate": 5.887463616940029e-06, "loss": 0.679, "step": 8016 }, { "epoch": 0.5643787398803238, "grad_norm": 1.7363585233688354, "learning_rate": 5.885887827576163e-06, "loss": 0.6168, "step": 8017 }, { "epoch": 0.564449137627596, "grad_norm": 1.6450824737548828, "learning_rate": 5.884312096135995e-06, "loss": 0.5557, "step": 8018 }, { "epoch": 0.564519535374868, "grad_norm": 1.9061498641967773, "learning_rate": 5.882736422701454e-06, "loss": 0.7686, "step": 8019 }, { "epoch": 0.5645899331221401, "grad_norm": 3.9072365760803223, "learning_rate": 5.881160807354457e-06, "loss": 0.7204, "step": 8020 }, { "epoch": 0.5646603308694121, "grad_norm": 1.7166398763656616, "learning_rate": 5.8795852501769215e-06, "loss": 0.742, "step": 8021 }, { "epoch": 0.5647307286166843, "grad_norm": 1.8614035844802856, "learning_rate": 5.878009751250767e-06, "loss": 0.7228, "step": 8022 }, { "epoch": 0.5648011263639564, "grad_norm": 2.0024378299713135, "learning_rate": 5.8764343106579e-06, "loss": 0.5854, "step": 8023 }, { "epoch": 0.5648715241112284, "grad_norm": 2.3426942825317383, "learning_rate": 5.874858928480232e-06, "loss": 0.7305, "step": 8024 }, { "epoch": 0.5649419218585006, "grad_norm": 2.3908426761627197, "learning_rate": 5.873283604799666e-06, "loss": 0.7874, "step": 8025 }, { "epoch": 0.5650123196057726, "grad_norm": 1.6863834857940674, "learning_rate": 5.871708339698107e-06, "loss": 0.6951, "step": 8026 }, { "epoch": 0.5650827173530447, "grad_norm": 2.3582000732421875, "learning_rate": 5.87013313325745e-06, "loss": 0.7462, "step": 8027 }, { "epoch": 0.5651531151003167, "grad_norm": 1.8845301866531372, "learning_rate": 5.868557985559599e-06, "loss": 0.6994, "step": 8028 }, { "epoch": 0.5652235128475889, "grad_norm": 1.6765400171279907, "learning_rate": 5.86698289668644e-06, "loss": 0.8201, "step": 8029 }, { "epoch": 0.565293910594861, "grad_norm": 1.8844454288482666, "learning_rate": 5.865407866719868e-06, "loss": 0.7003, "step": 8030 }, { "epoch": 0.565364308342133, "grad_norm": 1.9080604314804077, "learning_rate": 5.863832895741768e-06, "loss": 0.7826, "step": 8031 }, { "epoch": 0.5654347060894052, "grad_norm": 1.9972426891326904, "learning_rate": 5.862257983834025e-06, "loss": 0.7453, "step": 8032 }, { "epoch": 0.5655051038366772, "grad_norm": 1.8196810483932495, "learning_rate": 5.860683131078518e-06, "loss": 0.7026, "step": 8033 }, { "epoch": 0.5655755015839493, "grad_norm": 1.8084654808044434, "learning_rate": 5.85910833755713e-06, "loss": 0.6053, "step": 8034 }, { "epoch": 0.5656458993312214, "grad_norm": 2.063110589981079, "learning_rate": 5.857533603351731e-06, "loss": 0.5979, "step": 8035 }, { "epoch": 0.5657162970784935, "grad_norm": 1.9057331085205078, "learning_rate": 5.855958928544195e-06, "loss": 0.75, "step": 8036 }, { "epoch": 0.5657866948257656, "grad_norm": 1.8351106643676758, "learning_rate": 5.854384313216389e-06, "loss": 0.6529, "step": 8037 }, { "epoch": 0.5658570925730376, "grad_norm": 1.6137727499008179, "learning_rate": 5.852809757450183e-06, "loss": 0.7517, "step": 8038 }, { "epoch": 0.5659274903203098, "grad_norm": 1.8229238986968994, "learning_rate": 5.851235261327433e-06, "loss": 0.7109, "step": 8039 }, { "epoch": 0.5659978880675819, "grad_norm": 2.202348232269287, "learning_rate": 5.849660824930004e-06, "loss": 0.6136, "step": 8040 }, { "epoch": 0.5660682858148539, "grad_norm": 1.9608862400054932, "learning_rate": 5.848086448339751e-06, "loss": 0.696, "step": 8041 }, { "epoch": 0.566138683562126, "grad_norm": 1.9533987045288086, "learning_rate": 5.846512131638527e-06, "loss": 0.7399, "step": 8042 }, { "epoch": 0.5662090813093981, "grad_norm": 2.0814077854156494, "learning_rate": 5.844937874908181e-06, "loss": 0.7363, "step": 8043 }, { "epoch": 0.5662794790566702, "grad_norm": 2.2985100746154785, "learning_rate": 5.843363678230562e-06, "loss": 0.6788, "step": 8044 }, { "epoch": 0.5663498768039422, "grad_norm": 1.7289729118347168, "learning_rate": 5.84178954168751e-06, "loss": 0.65, "step": 8045 }, { "epoch": 0.5664202745512144, "grad_norm": 1.7576593160629272, "learning_rate": 5.840215465360872e-06, "loss": 0.6361, "step": 8046 }, { "epoch": 0.5664906722984865, "grad_norm": 1.4652199745178223, "learning_rate": 5.838641449332481e-06, "loss": 0.679, "step": 8047 }, { "epoch": 0.5665610700457585, "grad_norm": 1.2343019247055054, "learning_rate": 5.837067493684174e-06, "loss": 0.6878, "step": 8048 }, { "epoch": 0.5666314677930306, "grad_norm": 1.8032689094543457, "learning_rate": 5.83549359849778e-06, "loss": 0.7615, "step": 8049 }, { "epoch": 0.5667018655403027, "grad_norm": 1.8691171407699585, "learning_rate": 5.83391976385513e-06, "loss": 0.7338, "step": 8050 }, { "epoch": 0.5667722632875748, "grad_norm": 2.2192437648773193, "learning_rate": 5.832345989838043e-06, "loss": 0.7129, "step": 8051 }, { "epoch": 0.5668426610348469, "grad_norm": 1.7518559694290161, "learning_rate": 5.83077227652835e-06, "loss": 0.6778, "step": 8052 }, { "epoch": 0.566913058782119, "grad_norm": 1.8813222646713257, "learning_rate": 5.829198624007864e-06, "loss": 0.7217, "step": 8053 }, { "epoch": 0.5669834565293911, "grad_norm": 2.211686849594116, "learning_rate": 5.827625032358402e-06, "loss": 0.738, "step": 8054 }, { "epoch": 0.5670538542766631, "grad_norm": 2.329256534576416, "learning_rate": 5.826051501661776e-06, "loss": 0.7365, "step": 8055 }, { "epoch": 0.5671242520239352, "grad_norm": 1.531620740890503, "learning_rate": 5.824478031999797e-06, "loss": 0.6343, "step": 8056 }, { "epoch": 0.5671946497712074, "grad_norm": 1.7742172479629517, "learning_rate": 5.8229046234542664e-06, "loss": 0.6319, "step": 8057 }, { "epoch": 0.5672650475184794, "grad_norm": 2.907712459564209, "learning_rate": 5.821331276106994e-06, "loss": 0.7352, "step": 8058 }, { "epoch": 0.5673354452657515, "grad_norm": 1.7791705131530762, "learning_rate": 5.819757990039774e-06, "loss": 0.6297, "step": 8059 }, { "epoch": 0.5674058430130235, "grad_norm": 1.8914343118667603, "learning_rate": 5.818184765334407e-06, "loss": 0.7111, "step": 8060 }, { "epoch": 0.5674762407602957, "grad_norm": 1.8173452615737915, "learning_rate": 5.816611602072682e-06, "loss": 0.5908, "step": 8061 }, { "epoch": 0.5675466385075678, "grad_norm": 1.5978857278823853, "learning_rate": 5.815038500336394e-06, "loss": 0.6885, "step": 8062 }, { "epoch": 0.5676170362548398, "grad_norm": 1.670300006866455, "learning_rate": 5.813465460207323e-06, "loss": 0.7123, "step": 8063 }, { "epoch": 0.567687434002112, "grad_norm": 12.119657516479492, "learning_rate": 5.8118924817672614e-06, "loss": 0.7516, "step": 8064 }, { "epoch": 0.567757831749384, "grad_norm": 1.9405988454818726, "learning_rate": 5.810319565097984e-06, "loss": 0.6908, "step": 8065 }, { "epoch": 0.5678282294966561, "grad_norm": 1.9134366512298584, "learning_rate": 5.808746710281272e-06, "loss": 0.5622, "step": 8066 }, { "epoch": 0.5678986272439281, "grad_norm": 1.795207142829895, "learning_rate": 5.807173917398895e-06, "loss": 0.6403, "step": 8067 }, { "epoch": 0.5679690249912003, "grad_norm": 1.5551453828811646, "learning_rate": 5.8056011865326285e-06, "loss": 0.6548, "step": 8068 }, { "epoch": 0.5680394227384724, "grad_norm": 1.9095842838287354, "learning_rate": 5.8040285177642345e-06, "loss": 0.6782, "step": 8069 }, { "epoch": 0.5681098204857444, "grad_norm": 1.7711478471755981, "learning_rate": 5.802455911175484e-06, "loss": 0.6824, "step": 8070 }, { "epoch": 0.5681802182330166, "grad_norm": 1.954138159751892, "learning_rate": 5.800883366848134e-06, "loss": 0.6621, "step": 8071 }, { "epoch": 0.5682506159802886, "grad_norm": 1.641762375831604, "learning_rate": 5.799310884863943e-06, "loss": 0.5183, "step": 8072 }, { "epoch": 0.5683210137275607, "grad_norm": 2.1440935134887695, "learning_rate": 5.79773846530467e-06, "loss": 0.7574, "step": 8073 }, { "epoch": 0.5683914114748329, "grad_norm": 1.8141509294509888, "learning_rate": 5.796166108252058e-06, "loss": 0.6859, "step": 8074 }, { "epoch": 0.5684618092221049, "grad_norm": 1.6928112506866455, "learning_rate": 5.794593813787865e-06, "loss": 0.5669, "step": 8075 }, { "epoch": 0.568532206969377, "grad_norm": 1.8217519521713257, "learning_rate": 5.793021581993828e-06, "loss": 0.6889, "step": 8076 }, { "epoch": 0.568602604716649, "grad_norm": 2.1448256969451904, "learning_rate": 5.791449412951694e-06, "loss": 0.5965, "step": 8077 }, { "epoch": 0.5686730024639212, "grad_norm": 1.5726782083511353, "learning_rate": 5.789877306743198e-06, "loss": 0.7748, "step": 8078 }, { "epoch": 0.5687434002111933, "grad_norm": 1.774440050125122, "learning_rate": 5.788305263450078e-06, "loss": 0.5501, "step": 8079 }, { "epoch": 0.5688137979584653, "grad_norm": 2.2894399166107178, "learning_rate": 5.786733283154064e-06, "loss": 0.6726, "step": 8080 }, { "epoch": 0.5688841957057374, "grad_norm": 1.7112470865249634, "learning_rate": 5.785161365936887e-06, "loss": 0.6936, "step": 8081 }, { "epoch": 0.5689545934530095, "grad_norm": 1.8500607013702393, "learning_rate": 5.7835895118802686e-06, "loss": 0.6149, "step": 8082 }, { "epoch": 0.5690249912002816, "grad_norm": 2.1945598125457764, "learning_rate": 5.782017721065936e-06, "loss": 0.7967, "step": 8083 }, { "epoch": 0.5690953889475536, "grad_norm": 1.7712244987487793, "learning_rate": 5.780445993575601e-06, "loss": 0.6958, "step": 8084 }, { "epoch": 0.5691657866948258, "grad_norm": 1.9915916919708252, "learning_rate": 5.778874329490987e-06, "loss": 0.6951, "step": 8085 }, { "epoch": 0.5692361844420979, "grad_norm": 2.1172213554382324, "learning_rate": 5.7773027288938005e-06, "loss": 0.6194, "step": 8086 }, { "epoch": 0.5693065821893699, "grad_norm": 1.949658989906311, "learning_rate": 5.775731191865754e-06, "loss": 0.6513, "step": 8087 }, { "epoch": 0.569376979936642, "grad_norm": 1.748931646347046, "learning_rate": 5.77415971848855e-06, "loss": 0.65, "step": 8088 }, { "epoch": 0.5694473776839141, "grad_norm": 1.9200104475021362, "learning_rate": 5.772588308843895e-06, "loss": 0.6614, "step": 8089 }, { "epoch": 0.5695177754311862, "grad_norm": 2.379971504211426, "learning_rate": 5.771016963013481e-06, "loss": 0.7204, "step": 8090 }, { "epoch": 0.5695881731784583, "grad_norm": 1.9537075757980347, "learning_rate": 5.769445681079011e-06, "loss": 0.745, "step": 8091 }, { "epoch": 0.5696585709257304, "grad_norm": 1.9772095680236816, "learning_rate": 5.767874463122174e-06, "loss": 0.6636, "step": 8092 }, { "epoch": 0.5697289686730025, "grad_norm": 1.6935769319534302, "learning_rate": 5.7663033092246605e-06, "loss": 0.6161, "step": 8093 }, { "epoch": 0.5697993664202745, "grad_norm": 1.5233746767044067, "learning_rate": 5.7647322194681536e-06, "loss": 0.6547, "step": 8094 }, { "epoch": 0.5698697641675466, "grad_norm": 1.8158254623413086, "learning_rate": 5.7631611939343395e-06, "loss": 0.6366, "step": 8095 }, { "epoch": 0.5699401619148188, "grad_norm": 1.897362232208252, "learning_rate": 5.761590232704892e-06, "loss": 0.7039, "step": 8096 }, { "epoch": 0.5700105596620908, "grad_norm": 1.790728211402893, "learning_rate": 5.760019335861493e-06, "loss": 0.5871, "step": 8097 }, { "epoch": 0.5700809574093629, "grad_norm": 2.2662363052368164, "learning_rate": 5.75844850348581e-06, "loss": 0.6885, "step": 8098 }, { "epoch": 0.570151355156635, "grad_norm": 1.8791894912719727, "learning_rate": 5.756877735659514e-06, "loss": 0.7595, "step": 8099 }, { "epoch": 0.5702217529039071, "grad_norm": 1.7183377742767334, "learning_rate": 5.755307032464271e-06, "loss": 0.5573, "step": 8100 }, { "epoch": 0.5702921506511791, "grad_norm": 1.7903143167495728, "learning_rate": 5.753736393981742e-06, "loss": 0.6341, "step": 8101 }, { "epoch": 0.5703625483984512, "grad_norm": 2.0109353065490723, "learning_rate": 5.752165820293585e-06, "loss": 0.6693, "step": 8102 }, { "epoch": 0.5704329461457234, "grad_norm": 1.944618582725525, "learning_rate": 5.75059531148146e-06, "loss": 0.6606, "step": 8103 }, { "epoch": 0.5705033438929954, "grad_norm": 2.2054436206817627, "learning_rate": 5.7490248676270145e-06, "loss": 0.6955, "step": 8104 }, { "epoch": 0.5705737416402675, "grad_norm": 1.7767367362976074, "learning_rate": 5.747454488811901e-06, "loss": 0.6406, "step": 8105 }, { "epoch": 0.5706441393875396, "grad_norm": 1.6355843544006348, "learning_rate": 5.745884175117761e-06, "loss": 0.5783, "step": 8106 }, { "epoch": 0.5707145371348117, "grad_norm": 1.7044286727905273, "learning_rate": 5.74431392662624e-06, "loss": 0.7362, "step": 8107 }, { "epoch": 0.5707849348820838, "grad_norm": 1.6245155334472656, "learning_rate": 5.742743743418972e-06, "loss": 0.7232, "step": 8108 }, { "epoch": 0.5708553326293558, "grad_norm": 1.5250318050384521, "learning_rate": 5.741173625577598e-06, "loss": 0.6306, "step": 8109 }, { "epoch": 0.570925730376628, "grad_norm": 1.8652479648590088, "learning_rate": 5.739603573183747e-06, "loss": 0.6656, "step": 8110 }, { "epoch": 0.5709961281239, "grad_norm": 1.729027271270752, "learning_rate": 5.73803358631905e-06, "loss": 0.6319, "step": 8111 }, { "epoch": 0.5710665258711721, "grad_norm": 1.7715139389038086, "learning_rate": 5.7364636650651265e-06, "loss": 0.6893, "step": 8112 }, { "epoch": 0.5711369236184443, "grad_norm": 1.8506479263305664, "learning_rate": 5.734893809503603e-06, "loss": 0.7174, "step": 8113 }, { "epoch": 0.5712073213657163, "grad_norm": 1.8871264457702637, "learning_rate": 5.733324019716093e-06, "loss": 0.6385, "step": 8114 }, { "epoch": 0.5712777191129884, "grad_norm": 1.7246164083480835, "learning_rate": 5.731754295784218e-06, "loss": 0.7702, "step": 8115 }, { "epoch": 0.5713481168602604, "grad_norm": 1.5287353992462158, "learning_rate": 5.730184637789584e-06, "loss": 0.669, "step": 8116 }, { "epoch": 0.5714185146075326, "grad_norm": 4.823519706726074, "learning_rate": 5.728615045813803e-06, "loss": 0.7244, "step": 8117 }, { "epoch": 0.5714889123548047, "grad_norm": 1.9422423839569092, "learning_rate": 5.727045519938474e-06, "loss": 0.7403, "step": 8118 }, { "epoch": 0.5715593101020767, "grad_norm": 1.8559401035308838, "learning_rate": 5.725476060245204e-06, "loss": 0.7102, "step": 8119 }, { "epoch": 0.5716297078493489, "grad_norm": 1.7097687721252441, "learning_rate": 5.723906666815585e-06, "loss": 0.6573, "step": 8120 }, { "epoch": 0.5717001055966209, "grad_norm": 1.731164574623108, "learning_rate": 5.722337339731215e-06, "loss": 0.6558, "step": 8121 }, { "epoch": 0.571770503343893, "grad_norm": 1.9202227592468262, "learning_rate": 5.720768079073686e-06, "loss": 0.634, "step": 8122 }, { "epoch": 0.571840901091165, "grad_norm": 1.6523429155349731, "learning_rate": 5.71919888492458e-06, "loss": 0.7238, "step": 8123 }, { "epoch": 0.5719112988384372, "grad_norm": 1.6720142364501953, "learning_rate": 5.7176297573654875e-06, "loss": 0.6996, "step": 8124 }, { "epoch": 0.5719816965857093, "grad_norm": 1.8736917972564697, "learning_rate": 5.7160606964779815e-06, "loss": 0.6947, "step": 8125 }, { "epoch": 0.5720520943329813, "grad_norm": 1.526715636253357, "learning_rate": 5.714491702343645e-06, "loss": 0.588, "step": 8126 }, { "epoch": 0.5721224920802535, "grad_norm": 2.2151427268981934, "learning_rate": 5.712922775044048e-06, "loss": 0.8473, "step": 8127 }, { "epoch": 0.5721928898275255, "grad_norm": 2.178990364074707, "learning_rate": 5.711353914660764e-06, "loss": 0.6591, "step": 8128 }, { "epoch": 0.5722632875747976, "grad_norm": 1.8896424770355225, "learning_rate": 5.709785121275353e-06, "loss": 0.7649, "step": 8129 }, { "epoch": 0.5723336853220697, "grad_norm": 1.9149203300476074, "learning_rate": 5.708216394969385e-06, "loss": 0.7582, "step": 8130 }, { "epoch": 0.5724040830693418, "grad_norm": 1.8181264400482178, "learning_rate": 5.706647735824416e-06, "loss": 0.6763, "step": 8131 }, { "epoch": 0.5724744808166139, "grad_norm": 1.684678554534912, "learning_rate": 5.705079143922004e-06, "loss": 0.648, "step": 8132 }, { "epoch": 0.5725448785638859, "grad_norm": 1.6635394096374512, "learning_rate": 5.703510619343697e-06, "loss": 0.6746, "step": 8133 }, { "epoch": 0.572615276311158, "grad_norm": 2.103398084640503, "learning_rate": 5.70194216217105e-06, "loss": 0.6563, "step": 8134 }, { "epoch": 0.5726856740584302, "grad_norm": 1.9093661308288574, "learning_rate": 5.700373772485603e-06, "loss": 0.7089, "step": 8135 }, { "epoch": 0.5727560718057022, "grad_norm": 1.6111443042755127, "learning_rate": 5.698805450368902e-06, "loss": 0.7023, "step": 8136 }, { "epoch": 0.5728264695529743, "grad_norm": 2.111361503601074, "learning_rate": 5.697237195902483e-06, "loss": 0.7017, "step": 8137 }, { "epoch": 0.5728968673002464, "grad_norm": 1.9252454042434692, "learning_rate": 5.695669009167884e-06, "loss": 0.6279, "step": 8138 }, { "epoch": 0.5729672650475185, "grad_norm": 1.6173492670059204, "learning_rate": 5.694100890246633e-06, "loss": 0.5496, "step": 8139 }, { "epoch": 0.5730376627947905, "grad_norm": 1.772667646408081, "learning_rate": 5.692532839220261e-06, "loss": 0.8992, "step": 8140 }, { "epoch": 0.5731080605420626, "grad_norm": 1.6319092512130737, "learning_rate": 5.690964856170287e-06, "loss": 0.6369, "step": 8141 }, { "epoch": 0.5731784582893348, "grad_norm": 1.7490017414093018, "learning_rate": 5.689396941178239e-06, "loss": 0.7507, "step": 8142 }, { "epoch": 0.5732488560366068, "grad_norm": 2.2018632888793945, "learning_rate": 5.687829094325631e-06, "loss": 0.6556, "step": 8143 }, { "epoch": 0.5733192537838789, "grad_norm": 1.6492971181869507, "learning_rate": 5.686261315693976e-06, "loss": 0.8279, "step": 8144 }, { "epoch": 0.573389651531151, "grad_norm": 1.5351334810256958, "learning_rate": 5.6846936053647844e-06, "loss": 0.7324, "step": 8145 }, { "epoch": 0.5734600492784231, "grad_norm": 1.7504860162734985, "learning_rate": 5.683125963419565e-06, "loss": 0.7498, "step": 8146 }, { "epoch": 0.5735304470256952, "grad_norm": 1.8726359605789185, "learning_rate": 5.681558389939817e-06, "loss": 0.6442, "step": 8147 }, { "epoch": 0.5736008447729672, "grad_norm": 1.8259636163711548, "learning_rate": 5.6799908850070445e-06, "loss": 0.5939, "step": 8148 }, { "epoch": 0.5736712425202394, "grad_norm": 1.8069688081741333, "learning_rate": 5.67842344870274e-06, "loss": 0.771, "step": 8149 }, { "epoch": 0.5737416402675114, "grad_norm": 3.8866238594055176, "learning_rate": 5.676856081108398e-06, "loss": 0.5725, "step": 8150 }, { "epoch": 0.5738120380147835, "grad_norm": 1.8992180824279785, "learning_rate": 5.675288782305507e-06, "loss": 0.7524, "step": 8151 }, { "epoch": 0.5738824357620557, "grad_norm": 1.753021001815796, "learning_rate": 5.673721552375552e-06, "loss": 0.8048, "step": 8152 }, { "epoch": 0.5739528335093277, "grad_norm": 2.081477403640747, "learning_rate": 5.672154391400012e-06, "loss": 0.668, "step": 8153 }, { "epoch": 0.5740232312565998, "grad_norm": 1.9308956861495972, "learning_rate": 5.670587299460371e-06, "loss": 0.7006, "step": 8154 }, { "epoch": 0.5740936290038718, "grad_norm": 1.707717776298523, "learning_rate": 5.6690202766380984e-06, "loss": 0.6628, "step": 8155 }, { "epoch": 0.574164026751144, "grad_norm": 1.4307575225830078, "learning_rate": 5.6674533230146695e-06, "loss": 0.714, "step": 8156 }, { "epoch": 0.574234424498416, "grad_norm": 1.7731037139892578, "learning_rate": 5.665886438671547e-06, "loss": 0.6488, "step": 8157 }, { "epoch": 0.5743048222456881, "grad_norm": 1.7234880924224854, "learning_rate": 5.664319623690198e-06, "loss": 0.6188, "step": 8158 }, { "epoch": 0.5743752199929603, "grad_norm": 1.6666074991226196, "learning_rate": 5.662752878152079e-06, "loss": 0.7264, "step": 8159 }, { "epoch": 0.5744456177402323, "grad_norm": 1.653846025466919, "learning_rate": 5.6611862021386515e-06, "loss": 0.6061, "step": 8160 }, { "epoch": 0.5745160154875044, "grad_norm": 1.9214038848876953, "learning_rate": 5.659619595731364e-06, "loss": 0.7283, "step": 8161 }, { "epoch": 0.5745864132347764, "grad_norm": 1.7268505096435547, "learning_rate": 5.65805305901167e-06, "loss": 0.5976, "step": 8162 }, { "epoch": 0.5746568109820486, "grad_norm": 1.5392508506774902, "learning_rate": 5.6564865920610105e-06, "loss": 0.6915, "step": 8163 }, { "epoch": 0.5747272087293207, "grad_norm": 1.8991539478302002, "learning_rate": 5.654920194960833e-06, "loss": 0.5965, "step": 8164 }, { "epoch": 0.5747976064765927, "grad_norm": 1.6193293333053589, "learning_rate": 5.6533538677925675e-06, "loss": 0.6019, "step": 8165 }, { "epoch": 0.5748680042238649, "grad_norm": 1.727514386177063, "learning_rate": 5.651787610637658e-06, "loss": 0.6988, "step": 8166 }, { "epoch": 0.5749384019711369, "grad_norm": 1.8360650539398193, "learning_rate": 5.65022142357753e-06, "loss": 0.6597, "step": 8167 }, { "epoch": 0.575008799718409, "grad_norm": 2.250810384750366, "learning_rate": 5.648655306693614e-06, "loss": 0.7075, "step": 8168 }, { "epoch": 0.5750791974656811, "grad_norm": 2.007718324661255, "learning_rate": 5.647089260067332e-06, "loss": 0.7567, "step": 8169 }, { "epoch": 0.5751495952129532, "grad_norm": 1.7931090593338013, "learning_rate": 5.645523283780105e-06, "loss": 0.728, "step": 8170 }, { "epoch": 0.5752199929602253, "grad_norm": 1.8193519115447998, "learning_rate": 5.643957377913347e-06, "loss": 0.6853, "step": 8171 }, { "epoch": 0.5752903907074973, "grad_norm": 1.536608338356018, "learning_rate": 5.642391542548474e-06, "loss": 0.6896, "step": 8172 }, { "epoch": 0.5753607884547695, "grad_norm": 1.6288024187088013, "learning_rate": 5.640825777766895e-06, "loss": 0.6358, "step": 8173 }, { "epoch": 0.5754311862020416, "grad_norm": 1.6683870553970337, "learning_rate": 5.639260083650014e-06, "loss": 0.5808, "step": 8174 }, { "epoch": 0.5755015839493136, "grad_norm": 1.8539376258850098, "learning_rate": 5.6376944602792355e-06, "loss": 0.6702, "step": 8175 }, { "epoch": 0.5755719816965857, "grad_norm": 1.8664599657058716, "learning_rate": 5.636128907735952e-06, "loss": 0.6916, "step": 8176 }, { "epoch": 0.5756423794438578, "grad_norm": 1.9104851484298706, "learning_rate": 5.6345634261015655e-06, "loss": 0.7102, "step": 8177 }, { "epoch": 0.5757127771911299, "grad_norm": 1.6734907627105713, "learning_rate": 5.6329980154574615e-06, "loss": 0.7556, "step": 8178 }, { "epoch": 0.5757831749384019, "grad_norm": 2.050377130508423, "learning_rate": 5.63143267588503e-06, "loss": 0.7605, "step": 8179 }, { "epoch": 0.575853572685674, "grad_norm": 1.6815866231918335, "learning_rate": 5.6298674074656506e-06, "loss": 0.6877, "step": 8180 }, { "epoch": 0.5759239704329462, "grad_norm": 2.0409457683563232, "learning_rate": 5.628302210280708e-06, "loss": 0.7122, "step": 8181 }, { "epoch": 0.5759943681802182, "grad_norm": 2.7393882274627686, "learning_rate": 5.626737084411574e-06, "loss": 0.7278, "step": 8182 }, { "epoch": 0.5760647659274903, "grad_norm": 1.8753958940505981, "learning_rate": 5.625172029939625e-06, "loss": 0.6958, "step": 8183 }, { "epoch": 0.5761351636747624, "grad_norm": 1.871300458908081, "learning_rate": 5.623607046946225e-06, "loss": 0.615, "step": 8184 }, { "epoch": 0.5762055614220345, "grad_norm": 1.6851606369018555, "learning_rate": 5.622042135512743e-06, "loss": 0.6302, "step": 8185 }, { "epoch": 0.5762759591693066, "grad_norm": 1.463715672492981, "learning_rate": 5.620477295720535e-06, "loss": 0.8234, "step": 8186 }, { "epoch": 0.5763463569165787, "grad_norm": 1.8295669555664062, "learning_rate": 5.618912527650965e-06, "loss": 0.7365, "step": 8187 }, { "epoch": 0.5764167546638508, "grad_norm": 2.0169363021850586, "learning_rate": 5.617347831385382e-06, "loss": 0.7153, "step": 8188 }, { "epoch": 0.5764871524111228, "grad_norm": 2.0920462608337402, "learning_rate": 5.615783207005139e-06, "loss": 0.6497, "step": 8189 }, { "epoch": 0.5765575501583949, "grad_norm": 1.847169280052185, "learning_rate": 5.6142186545915785e-06, "loss": 0.777, "step": 8190 }, { "epoch": 0.5766279479056671, "grad_norm": 1.6676913499832153, "learning_rate": 5.612654174226048e-06, "loss": 0.5745, "step": 8191 }, { "epoch": 0.5766983456529391, "grad_norm": 1.6745476722717285, "learning_rate": 5.6110897659898795e-06, "loss": 0.6943, "step": 8192 }, { "epoch": 0.5767687434002112, "grad_norm": 1.8144006729125977, "learning_rate": 5.609525429964416e-06, "loss": 0.7265, "step": 8193 }, { "epoch": 0.5768391411474832, "grad_norm": 2.115290880203247, "learning_rate": 5.607961166230982e-06, "loss": 0.6097, "step": 8194 }, { "epoch": 0.5769095388947554, "grad_norm": 1.7117674350738525, "learning_rate": 5.6063969748709104e-06, "loss": 0.7019, "step": 8195 }, { "epoch": 0.5769799366420274, "grad_norm": 2.22959303855896, "learning_rate": 5.604832855965519e-06, "loss": 0.6614, "step": 8196 }, { "epoch": 0.5770503343892995, "grad_norm": 2.237795114517212, "learning_rate": 5.603268809596133e-06, "loss": 0.6829, "step": 8197 }, { "epoch": 0.5771207321365717, "grad_norm": 1.8944430351257324, "learning_rate": 5.601704835844064e-06, "loss": 0.6586, "step": 8198 }, { "epoch": 0.5771911298838437, "grad_norm": 1.9542096853256226, "learning_rate": 5.60014093479063e-06, "loss": 0.7462, "step": 8199 }, { "epoch": 0.5772615276311158, "grad_norm": 1.633751392364502, "learning_rate": 5.5985771065171355e-06, "loss": 0.6232, "step": 8200 }, { "epoch": 0.5773319253783878, "grad_norm": 1.6146372556686401, "learning_rate": 5.597013351104887e-06, "loss": 0.7112, "step": 8201 }, { "epoch": 0.57740232312566, "grad_norm": 1.949133276939392, "learning_rate": 5.595449668635184e-06, "loss": 0.6251, "step": 8202 }, { "epoch": 0.5774727208729321, "grad_norm": 2.099766492843628, "learning_rate": 5.593886059189326e-06, "loss": 0.6734, "step": 8203 }, { "epoch": 0.5775431186202041, "grad_norm": 2.0622096061706543, "learning_rate": 5.5923225228486025e-06, "loss": 0.5444, "step": 8204 }, { "epoch": 0.5776135163674763, "grad_norm": 1.790793776512146, "learning_rate": 5.590759059694308e-06, "loss": 0.6782, "step": 8205 }, { "epoch": 0.5776839141147483, "grad_norm": 2.0392489433288574, "learning_rate": 5.5891956698077264e-06, "loss": 0.723, "step": 8206 }, { "epoch": 0.5777543118620204, "grad_norm": 1.761608600616455, "learning_rate": 5.5876323532701404e-06, "loss": 0.7583, "step": 8207 }, { "epoch": 0.5778247096092926, "grad_norm": 1.8317463397979736, "learning_rate": 5.586069110162826e-06, "loss": 0.7487, "step": 8208 }, { "epoch": 0.5778951073565646, "grad_norm": 2.83974289894104, "learning_rate": 5.58450594056706e-06, "loss": 0.6731, "step": 8209 }, { "epoch": 0.5779655051038367, "grad_norm": 1.705824613571167, "learning_rate": 5.58294284456411e-06, "loss": 0.7107, "step": 8210 }, { "epoch": 0.5780359028511087, "grad_norm": 1.5219186544418335, "learning_rate": 5.581379822235247e-06, "loss": 0.5744, "step": 8211 }, { "epoch": 0.5781063005983809, "grad_norm": 1.8401975631713867, "learning_rate": 5.579816873661731e-06, "loss": 0.7831, "step": 8212 }, { "epoch": 0.5781766983456529, "grad_norm": 1.881591558456421, "learning_rate": 5.5782539989248226e-06, "loss": 0.6099, "step": 8213 }, { "epoch": 0.578247096092925, "grad_norm": 1.5496925115585327, "learning_rate": 5.576691198105776e-06, "loss": 0.6913, "step": 8214 }, { "epoch": 0.5783174938401971, "grad_norm": 1.7259063720703125, "learning_rate": 5.575128471285844e-06, "loss": 0.7081, "step": 8215 }, { "epoch": 0.5783878915874692, "grad_norm": 1.6139036417007446, "learning_rate": 5.573565818546269e-06, "loss": 0.6537, "step": 8216 }, { "epoch": 0.5784582893347413, "grad_norm": 1.53080153465271, "learning_rate": 5.572003239968304e-06, "loss": 0.6702, "step": 8217 }, { "epoch": 0.5785286870820133, "grad_norm": 1.4133095741271973, "learning_rate": 5.570440735633181e-06, "loss": 0.8067, "step": 8218 }, { "epoch": 0.5785990848292855, "grad_norm": 1.6692250967025757, "learning_rate": 5.568878305622141e-06, "loss": 0.6681, "step": 8219 }, { "epoch": 0.5786694825765576, "grad_norm": 1.6520192623138428, "learning_rate": 5.567315950016412e-06, "loss": 0.632, "step": 8220 }, { "epoch": 0.5787398803238296, "grad_norm": 1.6879892349243164, "learning_rate": 5.565753668897227e-06, "loss": 0.6514, "step": 8221 }, { "epoch": 0.5788102780711017, "grad_norm": 1.7876850366592407, "learning_rate": 5.5641914623458035e-06, "loss": 0.8392, "step": 8222 }, { "epoch": 0.5788806758183738, "grad_norm": 2.0689826011657715, "learning_rate": 5.562629330443368e-06, "loss": 0.799, "step": 8223 }, { "epoch": 0.5789510735656459, "grad_norm": 2.636178970336914, "learning_rate": 5.561067273271136e-06, "loss": 0.7372, "step": 8224 }, { "epoch": 0.579021471312918, "grad_norm": 1.6689369678497314, "learning_rate": 5.559505290910318e-06, "loss": 0.5981, "step": 8225 }, { "epoch": 0.5790918690601901, "grad_norm": 1.7435472011566162, "learning_rate": 5.557943383442129e-06, "loss": 0.743, "step": 8226 }, { "epoch": 0.5791622668074622, "grad_norm": 1.9502640962600708, "learning_rate": 5.556381550947765e-06, "loss": 0.751, "step": 8227 }, { "epoch": 0.5792326645547342, "grad_norm": 1.8088504076004028, "learning_rate": 5.554819793508434e-06, "loss": 0.6631, "step": 8228 }, { "epoch": 0.5793030623020063, "grad_norm": 1.5860257148742676, "learning_rate": 5.553258111205329e-06, "loss": 0.6646, "step": 8229 }, { "epoch": 0.5793734600492785, "grad_norm": 1.5610913038253784, "learning_rate": 5.5516965041196465e-06, "loss": 0.5686, "step": 8230 }, { "epoch": 0.5794438577965505, "grad_norm": 1.8429620265960693, "learning_rate": 5.550134972332571e-06, "loss": 0.618, "step": 8231 }, { "epoch": 0.5795142555438226, "grad_norm": 1.9208455085754395, "learning_rate": 5.548573515925298e-06, "loss": 0.696, "step": 8232 }, { "epoch": 0.5795846532910947, "grad_norm": 1.6332908868789673, "learning_rate": 5.547012134978996e-06, "loss": 0.5376, "step": 8233 }, { "epoch": 0.5796550510383668, "grad_norm": 1.9052255153656006, "learning_rate": 5.545450829574853e-06, "loss": 0.5968, "step": 8234 }, { "epoch": 0.5797254487856388, "grad_norm": 1.512439489364624, "learning_rate": 5.543889599794036e-06, "loss": 0.6695, "step": 8235 }, { "epoch": 0.5797958465329109, "grad_norm": 2.153076648712158, "learning_rate": 5.542328445717721e-06, "loss": 0.7188, "step": 8236 }, { "epoch": 0.5798662442801831, "grad_norm": 1.6548734903335571, "learning_rate": 5.540767367427066e-06, "loss": 0.624, "step": 8237 }, { "epoch": 0.5799366420274551, "grad_norm": 1.8989782333374023, "learning_rate": 5.5392063650032394e-06, "loss": 0.6002, "step": 8238 }, { "epoch": 0.5800070397747272, "grad_norm": 1.8820514678955078, "learning_rate": 5.537645438527397e-06, "loss": 0.6627, "step": 8239 }, { "epoch": 0.5800774375219993, "grad_norm": 1.7206306457519531, "learning_rate": 5.536084588080692e-06, "loss": 0.7226, "step": 8240 }, { "epoch": 0.5801478352692714, "grad_norm": 2.7029783725738525, "learning_rate": 5.534523813744275e-06, "loss": 0.6289, "step": 8241 }, { "epoch": 0.5802182330165435, "grad_norm": 1.6712077856063843, "learning_rate": 5.532963115599293e-06, "loss": 0.6741, "step": 8242 }, { "epoch": 0.5802886307638155, "grad_norm": 1.7420172691345215, "learning_rate": 5.5314024937268844e-06, "loss": 0.613, "step": 8243 }, { "epoch": 0.5803590285110877, "grad_norm": 2.12302827835083, "learning_rate": 5.529841948208194e-06, "loss": 0.5883, "step": 8244 }, { "epoch": 0.5804294262583597, "grad_norm": 1.9827967882156372, "learning_rate": 5.528281479124351e-06, "loss": 0.7268, "step": 8245 }, { "epoch": 0.5804998240056318, "grad_norm": 1.5134717226028442, "learning_rate": 5.526721086556486e-06, "loss": 0.7075, "step": 8246 }, { "epoch": 0.580570221752904, "grad_norm": 2.0789740085601807, "learning_rate": 5.5251607705857265e-06, "loss": 0.7042, "step": 8247 }, { "epoch": 0.580640619500176, "grad_norm": 2.0504043102264404, "learning_rate": 5.523600531293195e-06, "loss": 0.6545, "step": 8248 }, { "epoch": 0.5807110172474481, "grad_norm": 1.880976915359497, "learning_rate": 5.522040368760006e-06, "loss": 0.7902, "step": 8249 }, { "epoch": 0.5807814149947201, "grad_norm": 2.832029342651367, "learning_rate": 5.5204802830672795e-06, "loss": 0.7164, "step": 8250 }, { "epoch": 0.5808518127419923, "grad_norm": 1.7389121055603027, "learning_rate": 5.518920274296122e-06, "loss": 0.7294, "step": 8251 }, { "epoch": 0.5809222104892643, "grad_norm": 1.8672235012054443, "learning_rate": 5.5173603425276425e-06, "loss": 0.6876, "step": 8252 }, { "epoch": 0.5809926082365364, "grad_norm": 1.8448493480682373, "learning_rate": 5.51580048784294e-06, "loss": 0.5973, "step": 8253 }, { "epoch": 0.5810630059838086, "grad_norm": 3.606563091278076, "learning_rate": 5.5142407103231144e-06, "loss": 0.5375, "step": 8254 }, { "epoch": 0.5811334037310806, "grad_norm": 1.9489216804504395, "learning_rate": 5.5126810100492585e-06, "loss": 0.6647, "step": 8255 }, { "epoch": 0.5812038014783527, "grad_norm": 2.0137863159179688, "learning_rate": 5.511121387102467e-06, "loss": 0.7097, "step": 8256 }, { "epoch": 0.5812741992256247, "grad_norm": 1.7851628065109253, "learning_rate": 5.5095618415638196e-06, "loss": 0.794, "step": 8257 }, { "epoch": 0.5813445969728969, "grad_norm": 1.8300530910491943, "learning_rate": 5.508002373514405e-06, "loss": 0.6512, "step": 8258 }, { "epoch": 0.581414994720169, "grad_norm": 1.8893623352050781, "learning_rate": 5.506442983035297e-06, "loss": 0.651, "step": 8259 }, { "epoch": 0.581485392467441, "grad_norm": 1.6678426265716553, "learning_rate": 5.504883670207571e-06, "loss": 0.6836, "step": 8260 }, { "epoch": 0.5815557902147132, "grad_norm": 2.6464734077453613, "learning_rate": 5.5033244351122955e-06, "loss": 0.6425, "step": 8261 }, { "epoch": 0.5816261879619852, "grad_norm": 1.6643173694610596, "learning_rate": 5.50176527783054e-06, "loss": 0.6202, "step": 8262 }, { "epoch": 0.5816965857092573, "grad_norm": 1.9622917175292969, "learning_rate": 5.500206198443363e-06, "loss": 0.5931, "step": 8263 }, { "epoch": 0.5817669834565294, "grad_norm": 1.7560551166534424, "learning_rate": 5.498647197031827e-06, "loss": 0.6735, "step": 8264 }, { "epoch": 0.5818373812038015, "grad_norm": 1.7820117473602295, "learning_rate": 5.497088273676979e-06, "loss": 0.7165, "step": 8265 }, { "epoch": 0.5819077789510736, "grad_norm": 2.834916591644287, "learning_rate": 5.495529428459876e-06, "loss": 0.6063, "step": 8266 }, { "epoch": 0.5819781766983456, "grad_norm": 1.683653473854065, "learning_rate": 5.493970661461557e-06, "loss": 0.7386, "step": 8267 }, { "epoch": 0.5820485744456178, "grad_norm": 1.6850310564041138, "learning_rate": 5.492411972763071e-06, "loss": 0.7243, "step": 8268 }, { "epoch": 0.5821189721928899, "grad_norm": 2.95345139503479, "learning_rate": 5.490853362445451e-06, "loss": 0.6714, "step": 8269 }, { "epoch": 0.5821893699401619, "grad_norm": 1.90186607837677, "learning_rate": 5.489294830589731e-06, "loss": 0.7088, "step": 8270 }, { "epoch": 0.582259767687434, "grad_norm": 1.8064552545547485, "learning_rate": 5.48773637727694e-06, "loss": 0.7998, "step": 8271 }, { "epoch": 0.5823301654347061, "grad_norm": 1.6982916593551636, "learning_rate": 5.486178002588103e-06, "loss": 0.7776, "step": 8272 }, { "epoch": 0.5824005631819782, "grad_norm": 1.8335176706314087, "learning_rate": 5.484619706604246e-06, "loss": 0.6774, "step": 8273 }, { "epoch": 0.5824709609292502, "grad_norm": 2.3546249866485596, "learning_rate": 5.4830614894063814e-06, "loss": 0.7384, "step": 8274 }, { "epoch": 0.5825413586765223, "grad_norm": 1.7954144477844238, "learning_rate": 5.481503351075525e-06, "loss": 0.5878, "step": 8275 }, { "epoch": 0.5826117564237945, "grad_norm": 1.8957939147949219, "learning_rate": 5.4799452916926814e-06, "loss": 0.6293, "step": 8276 }, { "epoch": 0.5826821541710665, "grad_norm": 1.9578779935836792, "learning_rate": 5.478387311338863e-06, "loss": 0.7058, "step": 8277 }, { "epoch": 0.5827525519183386, "grad_norm": 1.6520581245422363, "learning_rate": 5.476829410095063e-06, "loss": 0.6446, "step": 8278 }, { "epoch": 0.5828229496656107, "grad_norm": 1.7950501441955566, "learning_rate": 5.475271588042284e-06, "loss": 0.7018, "step": 8279 }, { "epoch": 0.5828933474128828, "grad_norm": 1.6770246028900146, "learning_rate": 5.473713845261514e-06, "loss": 0.7813, "step": 8280 }, { "epoch": 0.5829637451601549, "grad_norm": 1.7638682126998901, "learning_rate": 5.472156181833745e-06, "loss": 0.6777, "step": 8281 }, { "epoch": 0.5830341429074269, "grad_norm": 2.426767349243164, "learning_rate": 5.4705985978399565e-06, "loss": 0.7514, "step": 8282 }, { "epoch": 0.5831045406546991, "grad_norm": 1.9168076515197754, "learning_rate": 5.469041093361137e-06, "loss": 0.6855, "step": 8283 }, { "epoch": 0.5831749384019711, "grad_norm": 1.7565991878509521, "learning_rate": 5.467483668478254e-06, "loss": 0.6517, "step": 8284 }, { "epoch": 0.5832453361492432, "grad_norm": 1.559098720550537, "learning_rate": 5.465926323272284e-06, "loss": 0.6492, "step": 8285 }, { "epoch": 0.5833157338965154, "grad_norm": 2.20475172996521, "learning_rate": 5.464369057824194e-06, "loss": 0.7991, "step": 8286 }, { "epoch": 0.5833861316437874, "grad_norm": 2.130322217941284, "learning_rate": 5.4628118722149485e-06, "loss": 0.7193, "step": 8287 }, { "epoch": 0.5834565293910595, "grad_norm": 1.7306833267211914, "learning_rate": 5.461254766525503e-06, "loss": 0.5363, "step": 8288 }, { "epoch": 0.5835269271383315, "grad_norm": 1.6801269054412842, "learning_rate": 5.459697740836818e-06, "loss": 0.5975, "step": 8289 }, { "epoch": 0.5835973248856037, "grad_norm": 1.7997920513153076, "learning_rate": 5.458140795229842e-06, "loss": 0.7007, "step": 8290 }, { "epoch": 0.5836677226328757, "grad_norm": 1.9618583917617798, "learning_rate": 5.456583929785523e-06, "loss": 0.7078, "step": 8291 }, { "epoch": 0.5837381203801478, "grad_norm": 1.5579930543899536, "learning_rate": 5.455027144584802e-06, "loss": 0.635, "step": 8292 }, { "epoch": 0.58380851812742, "grad_norm": 1.6427663564682007, "learning_rate": 5.4534704397086206e-06, "loss": 0.6648, "step": 8293 }, { "epoch": 0.583878915874692, "grad_norm": 1.920552372932434, "learning_rate": 5.451913815237909e-06, "loss": 0.7257, "step": 8294 }, { "epoch": 0.5839493136219641, "grad_norm": 2.1827950477600098, "learning_rate": 5.4503572712536026e-06, "loss": 0.7802, "step": 8295 }, { "epoch": 0.5840197113692361, "grad_norm": 2.3113808631896973, "learning_rate": 5.448800807836624e-06, "loss": 0.75, "step": 8296 }, { "epoch": 0.5840901091165083, "grad_norm": 1.7168505191802979, "learning_rate": 5.447244425067897e-06, "loss": 0.5355, "step": 8297 }, { "epoch": 0.5841605068637804, "grad_norm": 1.7230767011642456, "learning_rate": 5.445688123028338e-06, "loss": 0.7232, "step": 8298 }, { "epoch": 0.5842309046110524, "grad_norm": 1.8321757316589355, "learning_rate": 5.444131901798863e-06, "loss": 0.6173, "step": 8299 }, { "epoch": 0.5843013023583246, "grad_norm": 2.546560287475586, "learning_rate": 5.442575761460375e-06, "loss": 0.6552, "step": 8300 }, { "epoch": 0.5843717001055966, "grad_norm": 1.8300697803497314, "learning_rate": 5.4410197020937875e-06, "loss": 0.6915, "step": 8301 }, { "epoch": 0.5844420978528687, "grad_norm": 1.7439712285995483, "learning_rate": 5.439463723779996e-06, "loss": 0.6637, "step": 8302 }, { "epoch": 0.5845124956001408, "grad_norm": 1.6424226760864258, "learning_rate": 5.437907826599901e-06, "loss": 0.7049, "step": 8303 }, { "epoch": 0.5845828933474129, "grad_norm": 1.688897728919983, "learning_rate": 5.436352010634391e-06, "loss": 0.7345, "step": 8304 }, { "epoch": 0.584653291094685, "grad_norm": 1.583640694618225, "learning_rate": 5.434796275964357e-06, "loss": 0.6326, "step": 8305 }, { "epoch": 0.584723688841957, "grad_norm": 1.703668475151062, "learning_rate": 5.43324062267068e-06, "loss": 0.8158, "step": 8306 }, { "epoch": 0.5847940865892292, "grad_norm": 1.9501432180404663, "learning_rate": 5.431685050834244e-06, "loss": 0.6853, "step": 8307 }, { "epoch": 0.5848644843365012, "grad_norm": 1.7676399946212769, "learning_rate": 5.4301295605359215e-06, "loss": 0.7202, "step": 8308 }, { "epoch": 0.5849348820837733, "grad_norm": 1.8917163610458374, "learning_rate": 5.4285741518565866e-06, "loss": 0.7113, "step": 8309 }, { "epoch": 0.5850052798310454, "grad_norm": 1.583044409751892, "learning_rate": 5.427018824877103e-06, "loss": 0.5963, "step": 8310 }, { "epoch": 0.5850756775783175, "grad_norm": 1.8377320766448975, "learning_rate": 5.425463579678337e-06, "loss": 0.6595, "step": 8311 }, { "epoch": 0.5851460753255896, "grad_norm": 3.047484874725342, "learning_rate": 5.4239084163411416e-06, "loss": 0.6324, "step": 8312 }, { "epoch": 0.5852164730728616, "grad_norm": 1.597550868988037, "learning_rate": 5.422353334946379e-06, "loss": 0.6371, "step": 8313 }, { "epoch": 0.5852868708201338, "grad_norm": 1.7899389266967773, "learning_rate": 5.420798335574893e-06, "loss": 0.6428, "step": 8314 }, { "epoch": 0.5853572685674059, "grad_norm": 1.8575233221054077, "learning_rate": 5.419243418307534e-06, "loss": 0.6771, "step": 8315 }, { "epoch": 0.5854276663146779, "grad_norm": 2.0329737663269043, "learning_rate": 5.417688583225139e-06, "loss": 0.7, "step": 8316 }, { "epoch": 0.58549806406195, "grad_norm": 2.4444756507873535, "learning_rate": 5.416133830408551e-06, "loss": 0.6492, "step": 8317 }, { "epoch": 0.5855684618092221, "grad_norm": 3.518667221069336, "learning_rate": 5.414579159938595e-06, "loss": 0.7175, "step": 8318 }, { "epoch": 0.5856388595564942, "grad_norm": 1.802787184715271, "learning_rate": 5.4130245718961075e-06, "loss": 0.6816, "step": 8319 }, { "epoch": 0.5857092573037663, "grad_norm": 2.456324338912964, "learning_rate": 5.411470066361908e-06, "loss": 0.663, "step": 8320 }, { "epoch": 0.5857796550510384, "grad_norm": 1.606592059135437, "learning_rate": 5.4099156434168204e-06, "loss": 0.6908, "step": 8321 }, { "epoch": 0.5858500527983105, "grad_norm": 2.7831053733825684, "learning_rate": 5.408361303141656e-06, "loss": 0.6508, "step": 8322 }, { "epoch": 0.5859204505455825, "grad_norm": 1.7652250528335571, "learning_rate": 5.406807045617229e-06, "loss": 0.6995, "step": 8323 }, { "epoch": 0.5859908482928546, "grad_norm": 1.7334200143814087, "learning_rate": 5.4052528709243485e-06, "loss": 0.6014, "step": 8324 }, { "epoch": 0.5860612460401268, "grad_norm": 1.6266498565673828, "learning_rate": 5.403698779143814e-06, "loss": 0.6474, "step": 8325 }, { "epoch": 0.5861316437873988, "grad_norm": 2.0051681995391846, "learning_rate": 5.402144770356428e-06, "loss": 0.5822, "step": 8326 }, { "epoch": 0.5862020415346709, "grad_norm": 1.6933257579803467, "learning_rate": 5.400590844642978e-06, "loss": 0.7271, "step": 8327 }, { "epoch": 0.586272439281943, "grad_norm": 1.6895219087600708, "learning_rate": 5.399037002084265e-06, "loss": 0.6039, "step": 8328 }, { "epoch": 0.5863428370292151, "grad_norm": 1.942656397819519, "learning_rate": 5.397483242761063e-06, "loss": 0.6492, "step": 8329 }, { "epoch": 0.5864132347764871, "grad_norm": 1.53084135055542, "learning_rate": 5.395929566754161e-06, "loss": 0.737, "step": 8330 }, { "epoch": 0.5864836325237592, "grad_norm": 2.0106735229492188, "learning_rate": 5.3943759741443326e-06, "loss": 0.5946, "step": 8331 }, { "epoch": 0.5865540302710314, "grad_norm": 1.9458675384521484, "learning_rate": 5.392822465012353e-06, "loss": 0.7821, "step": 8332 }, { "epoch": 0.5866244280183034, "grad_norm": 1.7307686805725098, "learning_rate": 5.391269039438985e-06, "loss": 0.6662, "step": 8333 }, { "epoch": 0.5866948257655755, "grad_norm": 1.881355881690979, "learning_rate": 5.389715697505002e-06, "loss": 0.7322, "step": 8334 }, { "epoch": 0.5867652235128475, "grad_norm": 1.8665058612823486, "learning_rate": 5.388162439291153e-06, "loss": 0.7478, "step": 8335 }, { "epoch": 0.5868356212601197, "grad_norm": 1.7925149202346802, "learning_rate": 5.386609264878202e-06, "loss": 0.7535, "step": 8336 }, { "epoch": 0.5869060190073918, "grad_norm": 1.7513177394866943, "learning_rate": 5.3850561743468955e-06, "loss": 0.7366, "step": 8337 }, { "epoch": 0.5869764167546638, "grad_norm": 2.0716114044189453, "learning_rate": 5.383503167777982e-06, "loss": 0.7417, "step": 8338 }, { "epoch": 0.587046814501936, "grad_norm": 2.0462141036987305, "learning_rate": 5.3819502452522e-06, "loss": 0.6394, "step": 8339 }, { "epoch": 0.587117212249208, "grad_norm": 1.6991044282913208, "learning_rate": 5.380397406850294e-06, "loss": 0.7161, "step": 8340 }, { "epoch": 0.5871876099964801, "grad_norm": 1.7154700756072998, "learning_rate": 5.37884465265299e-06, "loss": 0.7809, "step": 8341 }, { "epoch": 0.5872580077437523, "grad_norm": 1.9495103359222412, "learning_rate": 5.3772919827410235e-06, "loss": 0.6476, "step": 8342 }, { "epoch": 0.5873284054910243, "grad_norm": 1.598648190498352, "learning_rate": 5.375739397195115e-06, "loss": 0.6669, "step": 8343 }, { "epoch": 0.5873988032382964, "grad_norm": 2.0440189838409424, "learning_rate": 5.3741868960959875e-06, "loss": 0.5774, "step": 8344 }, { "epoch": 0.5874692009855684, "grad_norm": 1.9413020610809326, "learning_rate": 5.372634479524353e-06, "loss": 0.7354, "step": 8345 }, { "epoch": 0.5875395987328406, "grad_norm": 1.7521178722381592, "learning_rate": 5.371082147560929e-06, "loss": 0.7862, "step": 8346 }, { "epoch": 0.5876099964801126, "grad_norm": 2.103842258453369, "learning_rate": 5.369529900286416e-06, "loss": 0.6207, "step": 8347 }, { "epoch": 0.5876803942273847, "grad_norm": 1.886500358581543, "learning_rate": 5.3679777377815236e-06, "loss": 0.5796, "step": 8348 }, { "epoch": 0.5877507919746568, "grad_norm": 1.7851455211639404, "learning_rate": 5.366425660126944e-06, "loss": 0.6277, "step": 8349 }, { "epoch": 0.5878211897219289, "grad_norm": 2.5376858711242676, "learning_rate": 5.364873667403376e-06, "loss": 0.6055, "step": 8350 }, { "epoch": 0.587891587469201, "grad_norm": 1.8392527103424072, "learning_rate": 5.363321759691504e-06, "loss": 0.6695, "step": 8351 }, { "epoch": 0.587961985216473, "grad_norm": 2.248911142349243, "learning_rate": 5.361769937072019e-06, "loss": 0.6314, "step": 8352 }, { "epoch": 0.5880323829637452, "grad_norm": 1.6114343404769897, "learning_rate": 5.360218199625596e-06, "loss": 0.7296, "step": 8353 }, { "epoch": 0.5881027807110173, "grad_norm": 1.7467126846313477, "learning_rate": 5.358666547432917e-06, "loss": 0.7031, "step": 8354 }, { "epoch": 0.5881731784582893, "grad_norm": 1.914749264717102, "learning_rate": 5.357114980574648e-06, "loss": 0.717, "step": 8355 }, { "epoch": 0.5882435762055614, "grad_norm": 1.6608506441116333, "learning_rate": 5.355563499131462e-06, "loss": 0.7272, "step": 8356 }, { "epoch": 0.5883139739528335, "grad_norm": 1.901599407196045, "learning_rate": 5.354012103184014e-06, "loss": 0.6925, "step": 8357 }, { "epoch": 0.5883843717001056, "grad_norm": 1.4379565715789795, "learning_rate": 5.352460792812971e-06, "loss": 0.7301, "step": 8358 }, { "epoch": 0.5884547694473777, "grad_norm": 1.6602476835250854, "learning_rate": 5.3509095680989824e-06, "loss": 0.6346, "step": 8359 }, { "epoch": 0.5885251671946498, "grad_norm": 1.9433786869049072, "learning_rate": 5.349358429122701e-06, "loss": 0.717, "step": 8360 }, { "epoch": 0.5885955649419219, "grad_norm": 1.727460503578186, "learning_rate": 5.347807375964767e-06, "loss": 0.5063, "step": 8361 }, { "epoch": 0.5886659626891939, "grad_norm": 1.9968005418777466, "learning_rate": 5.346256408705826e-06, "loss": 0.6714, "step": 8362 }, { "epoch": 0.588736360436466, "grad_norm": 1.8546010255813599, "learning_rate": 5.344705527426509e-06, "loss": 0.5155, "step": 8363 }, { "epoch": 0.5888067581837381, "grad_norm": 1.7824926376342773, "learning_rate": 5.343154732207454e-06, "loss": 0.6354, "step": 8364 }, { "epoch": 0.5888771559310102, "grad_norm": 1.6552598476409912, "learning_rate": 5.341604023129283e-06, "loss": 0.7075, "step": 8365 }, { "epoch": 0.5889475536782823, "grad_norm": 1.9904884099960327, "learning_rate": 5.340053400272623e-06, "loss": 0.6023, "step": 8366 }, { "epoch": 0.5890179514255544, "grad_norm": 1.6443607807159424, "learning_rate": 5.338502863718088e-06, "loss": 0.6593, "step": 8367 }, { "epoch": 0.5890883491728265, "grad_norm": 2.186485528945923, "learning_rate": 5.336952413546295e-06, "loss": 0.7277, "step": 8368 }, { "epoch": 0.5891587469200985, "grad_norm": 1.7671834230422974, "learning_rate": 5.335402049837849e-06, "loss": 0.6964, "step": 8369 }, { "epoch": 0.5892291446673706, "grad_norm": 1.758019208908081, "learning_rate": 5.333851772673361e-06, "loss": 0.6913, "step": 8370 }, { "epoch": 0.5892995424146428, "grad_norm": 1.7682162523269653, "learning_rate": 5.332301582133429e-06, "loss": 0.643, "step": 8371 }, { "epoch": 0.5893699401619148, "grad_norm": 1.755088210105896, "learning_rate": 5.330751478298644e-06, "loss": 0.5409, "step": 8372 }, { "epoch": 0.5894403379091869, "grad_norm": 1.946739673614502, "learning_rate": 5.329201461249607e-06, "loss": 0.6457, "step": 8373 }, { "epoch": 0.589510735656459, "grad_norm": 1.7998157739639282, "learning_rate": 5.327651531066894e-06, "loss": 0.7217, "step": 8374 }, { "epoch": 0.5895811334037311, "grad_norm": 1.7877662181854248, "learning_rate": 5.3261016878310965e-06, "loss": 0.7874, "step": 8375 }, { "epoch": 0.5896515311510032, "grad_norm": 2.0926873683929443, "learning_rate": 5.324551931622786e-06, "loss": 0.6781, "step": 8376 }, { "epoch": 0.5897219288982752, "grad_norm": 1.5359621047973633, "learning_rate": 5.323002262522539e-06, "loss": 0.6583, "step": 8377 }, { "epoch": 0.5897923266455474, "grad_norm": 2.2465991973876953, "learning_rate": 5.321452680610922e-06, "loss": 0.7219, "step": 8378 }, { "epoch": 0.5898627243928194, "grad_norm": 1.6470400094985962, "learning_rate": 5.319903185968503e-06, "loss": 0.5737, "step": 8379 }, { "epoch": 0.5899331221400915, "grad_norm": 1.7321155071258545, "learning_rate": 5.318353778675835e-06, "loss": 0.6591, "step": 8380 }, { "epoch": 0.5900035198873637, "grad_norm": 2.0315535068511963, "learning_rate": 5.316804458813481e-06, "loss": 0.6546, "step": 8381 }, { "epoch": 0.5900739176346357, "grad_norm": 2.011564016342163, "learning_rate": 5.315255226461984e-06, "loss": 0.761, "step": 8382 }, { "epoch": 0.5901443153819078, "grad_norm": 1.582651138305664, "learning_rate": 5.313706081701897e-06, "loss": 0.6099, "step": 8383 }, { "epoch": 0.5902147131291798, "grad_norm": 2.2776167392730713, "learning_rate": 5.312157024613755e-06, "loss": 0.6998, "step": 8384 }, { "epoch": 0.590285110876452, "grad_norm": 1.8175209760665894, "learning_rate": 5.3106080552781e-06, "loss": 0.6237, "step": 8385 }, { "epoch": 0.590355508623724, "grad_norm": 1.7746756076812744, "learning_rate": 5.30905917377546e-06, "loss": 0.6373, "step": 8386 }, { "epoch": 0.5904259063709961, "grad_norm": 1.644284963607788, "learning_rate": 5.307510380186367e-06, "loss": 0.6058, "step": 8387 }, { "epoch": 0.5904963041182683, "grad_norm": 1.7496849298477173, "learning_rate": 5.305961674591342e-06, "loss": 0.6745, "step": 8388 }, { "epoch": 0.5905667018655403, "grad_norm": 2.0633082389831543, "learning_rate": 5.304413057070904e-06, "loss": 0.6459, "step": 8389 }, { "epoch": 0.5906370996128124, "grad_norm": 2.186246156692505, "learning_rate": 5.3028645277055635e-06, "loss": 0.681, "step": 8390 }, { "epoch": 0.5907074973600844, "grad_norm": 1.8135716915130615, "learning_rate": 5.301316086575838e-06, "loss": 0.6653, "step": 8391 }, { "epoch": 0.5907778951073566, "grad_norm": 1.8409937620162964, "learning_rate": 5.299767733762223e-06, "loss": 0.6883, "step": 8392 }, { "epoch": 0.5908482928546287, "grad_norm": 1.7654578685760498, "learning_rate": 5.2982194693452275e-06, "loss": 0.637, "step": 8393 }, { "epoch": 0.5909186906019007, "grad_norm": 1.6612542867660522, "learning_rate": 5.29667129340534e-06, "loss": 0.6721, "step": 8394 }, { "epoch": 0.5909890883491729, "grad_norm": 1.691361427307129, "learning_rate": 5.295123206023057e-06, "loss": 0.6998, "step": 8395 }, { "epoch": 0.5910594860964449, "grad_norm": 3.454789876937866, "learning_rate": 5.293575207278859e-06, "loss": 0.5624, "step": 8396 }, { "epoch": 0.591129883843717, "grad_norm": 2.2487659454345703, "learning_rate": 5.2920272972532355e-06, "loss": 0.7168, "step": 8397 }, { "epoch": 0.5912002815909891, "grad_norm": 1.8641632795333862, "learning_rate": 5.290479476026657e-06, "loss": 0.7155, "step": 8398 }, { "epoch": 0.5912706793382612, "grad_norm": 1.8380218744277954, "learning_rate": 5.288931743679601e-06, "loss": 0.6472, "step": 8399 }, { "epoch": 0.5913410770855333, "grad_norm": 1.7818262577056885, "learning_rate": 5.287384100292531e-06, "loss": 0.8391, "step": 8400 }, { "epoch": 0.5914114748328053, "grad_norm": 1.9005120992660522, "learning_rate": 5.285836545945915e-06, "loss": 0.6727, "step": 8401 }, { "epoch": 0.5914818725800775, "grad_norm": 2.0661709308624268, "learning_rate": 5.284289080720206e-06, "loss": 0.6657, "step": 8402 }, { "epoch": 0.5915522703273495, "grad_norm": 1.7281635999679565, "learning_rate": 5.282741704695865e-06, "loss": 0.6936, "step": 8403 }, { "epoch": 0.5916226680746216, "grad_norm": 1.7442364692687988, "learning_rate": 5.281194417953335e-06, "loss": 0.6952, "step": 8404 }, { "epoch": 0.5916930658218937, "grad_norm": 2.2027995586395264, "learning_rate": 5.279647220573067e-06, "loss": 0.7173, "step": 8405 }, { "epoch": 0.5917634635691658, "grad_norm": 1.418886423110962, "learning_rate": 5.278100112635496e-06, "loss": 0.6662, "step": 8406 }, { "epoch": 0.5918338613164379, "grad_norm": 2.9493117332458496, "learning_rate": 5.27655309422106e-06, "loss": 0.758, "step": 8407 }, { "epoch": 0.5919042590637099, "grad_norm": 1.6099900007247925, "learning_rate": 5.275006165410187e-06, "loss": 0.7704, "step": 8408 }, { "epoch": 0.591974656810982, "grad_norm": 2.3151681423187256, "learning_rate": 5.27345932628331e-06, "loss": 0.6545, "step": 8409 }, { "epoch": 0.5920450545582542, "grad_norm": 2.0567710399627686, "learning_rate": 5.2719125769208436e-06, "loss": 0.69, "step": 8410 }, { "epoch": 0.5921154523055262, "grad_norm": 2.069877862930298, "learning_rate": 5.270365917403209e-06, "loss": 0.7384, "step": 8411 }, { "epoch": 0.5921858500527983, "grad_norm": 1.6979007720947266, "learning_rate": 5.268819347810816e-06, "loss": 0.6329, "step": 8412 }, { "epoch": 0.5922562478000704, "grad_norm": 1.5977733135223389, "learning_rate": 5.267272868224074e-06, "loss": 0.5486, "step": 8413 }, { "epoch": 0.5923266455473425, "grad_norm": 1.4649254083633423, "learning_rate": 5.265726478723381e-06, "loss": 0.6736, "step": 8414 }, { "epoch": 0.5923970432946146, "grad_norm": 1.739910364151001, "learning_rate": 5.264180179389144e-06, "loss": 0.6113, "step": 8415 }, { "epoch": 0.5924674410418866, "grad_norm": 1.9360419511795044, "learning_rate": 5.262633970301748e-06, "loss": 0.6817, "step": 8416 }, { "epoch": 0.5925378387891588, "grad_norm": 1.6822668313980103, "learning_rate": 5.261087851541588e-06, "loss": 0.6684, "step": 8417 }, { "epoch": 0.5926082365364308, "grad_norm": 1.6639292240142822, "learning_rate": 5.259541823189044e-06, "loss": 0.6621, "step": 8418 }, { "epoch": 0.5926786342837029, "grad_norm": 1.8495620489120483, "learning_rate": 5.257995885324497e-06, "loss": 0.8016, "step": 8419 }, { "epoch": 0.592749032030975, "grad_norm": 2.0214755535125732, "learning_rate": 5.25645003802832e-06, "loss": 0.6894, "step": 8420 }, { "epoch": 0.5928194297782471, "grad_norm": 2.000746488571167, "learning_rate": 5.254904281380888e-06, "loss": 0.7174, "step": 8421 }, { "epoch": 0.5928898275255192, "grad_norm": 1.8237947225570679, "learning_rate": 5.25335861546256e-06, "loss": 0.6055, "step": 8422 }, { "epoch": 0.5929602252727912, "grad_norm": 1.8719887733459473, "learning_rate": 5.251813040353698e-06, "loss": 0.6519, "step": 8423 }, { "epoch": 0.5930306230200634, "grad_norm": 1.995680332183838, "learning_rate": 5.250267556134665e-06, "loss": 0.7133, "step": 8424 }, { "epoch": 0.5931010207673354, "grad_norm": 1.987311840057373, "learning_rate": 5.248722162885801e-06, "loss": 0.7367, "step": 8425 }, { "epoch": 0.5931714185146075, "grad_norm": 2.1546270847320557, "learning_rate": 5.24717686068746e-06, "loss": 0.6577, "step": 8426 }, { "epoch": 0.5932418162618797, "grad_norm": 1.9644157886505127, "learning_rate": 5.2456316496199825e-06, "loss": 0.5863, "step": 8427 }, { "epoch": 0.5933122140091517, "grad_norm": 1.6279038190841675, "learning_rate": 5.244086529763704e-06, "loss": 0.7078, "step": 8428 }, { "epoch": 0.5933826117564238, "grad_norm": 1.6573389768600464, "learning_rate": 5.242541501198954e-06, "loss": 0.7749, "step": 8429 }, { "epoch": 0.5934530095036958, "grad_norm": 1.94803786277771, "learning_rate": 5.240996564006068e-06, "loss": 0.6791, "step": 8430 }, { "epoch": 0.593523407250968, "grad_norm": 1.7930705547332764, "learning_rate": 5.2394517182653584e-06, "loss": 0.7573, "step": 8431 }, { "epoch": 0.5935938049982401, "grad_norm": 2.0571398735046387, "learning_rate": 5.237906964057153e-06, "loss": 0.7046, "step": 8432 }, { "epoch": 0.5936642027455121, "grad_norm": 1.5812034606933594, "learning_rate": 5.236362301461757e-06, "loss": 0.5776, "step": 8433 }, { "epoch": 0.5937346004927843, "grad_norm": 1.9026424884796143, "learning_rate": 5.234817730559485e-06, "loss": 0.7376, "step": 8434 }, { "epoch": 0.5938049982400563, "grad_norm": 2.0452849864959717, "learning_rate": 5.233273251430635e-06, "loss": 0.5923, "step": 8435 }, { "epoch": 0.5938753959873284, "grad_norm": 1.7293288707733154, "learning_rate": 5.231728864155513e-06, "loss": 0.7222, "step": 8436 }, { "epoch": 0.5939457937346005, "grad_norm": 2.533982753753662, "learning_rate": 5.230184568814403e-06, "loss": 0.6477, "step": 8437 }, { "epoch": 0.5940161914818726, "grad_norm": 1.5526905059814453, "learning_rate": 5.228640365487605e-06, "loss": 0.6949, "step": 8438 }, { "epoch": 0.5940865892291447, "grad_norm": 1.5807985067367554, "learning_rate": 5.227096254255396e-06, "loss": 0.6698, "step": 8439 }, { "epoch": 0.5941569869764167, "grad_norm": 1.56657874584198, "learning_rate": 5.225552235198059e-06, "loss": 0.6564, "step": 8440 }, { "epoch": 0.5942273847236889, "grad_norm": 2.1229159832000732, "learning_rate": 5.224008308395867e-06, "loss": 0.6773, "step": 8441 }, { "epoch": 0.5942977824709609, "grad_norm": 1.6430245637893677, "learning_rate": 5.222464473929095e-06, "loss": 0.6343, "step": 8442 }, { "epoch": 0.594368180218233, "grad_norm": 1.627137303352356, "learning_rate": 5.2209207318780015e-06, "loss": 0.6773, "step": 8443 }, { "epoch": 0.5944385779655051, "grad_norm": 1.7977741956710815, "learning_rate": 5.2193770823228526e-06, "loss": 0.6276, "step": 8444 }, { "epoch": 0.5945089757127772, "grad_norm": 1.5045078992843628, "learning_rate": 5.2178335253439e-06, "loss": 0.6274, "step": 8445 }, { "epoch": 0.5945793734600493, "grad_norm": 2.0456531047821045, "learning_rate": 5.216290061021398e-06, "loss": 0.721, "step": 8446 }, { "epoch": 0.5946497712073213, "grad_norm": 1.6100322008132935, "learning_rate": 5.214746689435589e-06, "loss": 0.7668, "step": 8447 }, { "epoch": 0.5947201689545935, "grad_norm": 1.759446620941162, "learning_rate": 5.213203410666719e-06, "loss": 0.722, "step": 8448 }, { "epoch": 0.5947905667018656, "grad_norm": 1.530649185180664, "learning_rate": 5.21166022479502e-06, "loss": 0.5748, "step": 8449 }, { "epoch": 0.5948609644491376, "grad_norm": 1.52797532081604, "learning_rate": 5.210117131900727e-06, "loss": 0.6511, "step": 8450 }, { "epoch": 0.5949313621964097, "grad_norm": 1.9313249588012695, "learning_rate": 5.208574132064064e-06, "loss": 0.6941, "step": 8451 }, { "epoch": 0.5950017599436818, "grad_norm": 1.6650738716125488, "learning_rate": 5.207031225365256e-06, "loss": 0.6766, "step": 8452 }, { "epoch": 0.5950721576909539, "grad_norm": 1.8308238983154297, "learning_rate": 5.205488411884516e-06, "loss": 0.7326, "step": 8453 }, { "epoch": 0.595142555438226, "grad_norm": 1.9926059246063232, "learning_rate": 5.203945691702061e-06, "loss": 0.7732, "step": 8454 }, { "epoch": 0.595212953185498, "grad_norm": 1.7136894464492798, "learning_rate": 5.202403064898094e-06, "loss": 0.6819, "step": 8455 }, { "epoch": 0.5952833509327702, "grad_norm": 1.59889554977417, "learning_rate": 5.200860531552822e-06, "loss": 0.7941, "step": 8456 }, { "epoch": 0.5953537486800422, "grad_norm": 1.6473182439804077, "learning_rate": 5.199318091746438e-06, "loss": 0.7225, "step": 8457 }, { "epoch": 0.5954241464273143, "grad_norm": 1.749969482421875, "learning_rate": 5.19777574555914e-06, "loss": 0.6209, "step": 8458 }, { "epoch": 0.5954945441745864, "grad_norm": 3.95424222946167, "learning_rate": 5.196233493071109e-06, "loss": 0.6266, "step": 8459 }, { "epoch": 0.5955649419218585, "grad_norm": 1.7472293376922607, "learning_rate": 5.1946913343625366e-06, "loss": 0.7063, "step": 8460 }, { "epoch": 0.5956353396691306, "grad_norm": 1.813157081604004, "learning_rate": 5.1931492695135935e-06, "loss": 0.6347, "step": 8461 }, { "epoch": 0.5957057374164026, "grad_norm": 1.6858958005905151, "learning_rate": 5.191607298604458e-06, "loss": 0.6391, "step": 8462 }, { "epoch": 0.5957761351636748, "grad_norm": 2.0438168048858643, "learning_rate": 5.190065421715297e-06, "loss": 0.7187, "step": 8463 }, { "epoch": 0.5958465329109468, "grad_norm": 1.9380947351455688, "learning_rate": 5.188523638926274e-06, "loss": 0.7089, "step": 8464 }, { "epoch": 0.5959169306582189, "grad_norm": 1.6544520854949951, "learning_rate": 5.186981950317545e-06, "loss": 0.6336, "step": 8465 }, { "epoch": 0.5959873284054911, "grad_norm": 1.6327112913131714, "learning_rate": 5.1854403559692694e-06, "loss": 0.6226, "step": 8466 }, { "epoch": 0.5960577261527631, "grad_norm": 1.7212512493133545, "learning_rate": 5.183898855961591e-06, "loss": 0.5733, "step": 8467 }, { "epoch": 0.5961281239000352, "grad_norm": 1.7880879640579224, "learning_rate": 5.182357450374658e-06, "loss": 0.7616, "step": 8468 }, { "epoch": 0.5961985216473072, "grad_norm": 2.211440086364746, "learning_rate": 5.180816139288605e-06, "loss": 0.6081, "step": 8469 }, { "epoch": 0.5962689193945794, "grad_norm": 1.8194903135299683, "learning_rate": 5.179274922783571e-06, "loss": 0.656, "step": 8470 }, { "epoch": 0.5963393171418515, "grad_norm": 1.9253053665161133, "learning_rate": 5.177733800939679e-06, "loss": 0.6949, "step": 8471 }, { "epoch": 0.5964097148891235, "grad_norm": 1.5858569145202637, "learning_rate": 5.1761927738370604e-06, "loss": 0.5748, "step": 8472 }, { "epoch": 0.5964801126363957, "grad_norm": 1.9902812242507935, "learning_rate": 5.17465184155583e-06, "loss": 0.5605, "step": 8473 }, { "epoch": 0.5965505103836677, "grad_norm": 2.0020978450775146, "learning_rate": 5.173111004176102e-06, "loss": 0.7032, "step": 8474 }, { "epoch": 0.5966209081309398, "grad_norm": 1.9419745206832886, "learning_rate": 5.1715702617779915e-06, "loss": 0.7956, "step": 8475 }, { "epoch": 0.5966913058782118, "grad_norm": 1.9510911703109741, "learning_rate": 5.170029614441594e-06, "loss": 0.6076, "step": 8476 }, { "epoch": 0.596761703625484, "grad_norm": 1.9945069551467896, "learning_rate": 5.168489062247018e-06, "loss": 0.6921, "step": 8477 }, { "epoch": 0.5968321013727561, "grad_norm": 1.8520053625106812, "learning_rate": 5.166948605274353e-06, "loss": 0.6942, "step": 8478 }, { "epoch": 0.5969024991200281, "grad_norm": 2.0994372367858887, "learning_rate": 5.165408243603691e-06, "loss": 0.6709, "step": 8479 }, { "epoch": 0.5969728968673003, "grad_norm": 1.645316481590271, "learning_rate": 5.163867977315112e-06, "loss": 0.6427, "step": 8480 }, { "epoch": 0.5970432946145723, "grad_norm": 2.221527099609375, "learning_rate": 5.162327806488706e-06, "loss": 0.6555, "step": 8481 }, { "epoch": 0.5971136923618444, "grad_norm": 2.211069107055664, "learning_rate": 5.160787731204537e-06, "loss": 0.7698, "step": 8482 }, { "epoch": 0.5971840901091165, "grad_norm": 1.7305387258529663, "learning_rate": 5.159247751542682e-06, "loss": 0.6889, "step": 8483 }, { "epoch": 0.5972544878563886, "grad_norm": 1.7123372554779053, "learning_rate": 5.157707867583202e-06, "loss": 0.7088, "step": 8484 }, { "epoch": 0.5973248856036607, "grad_norm": 1.55746328830719, "learning_rate": 5.15616807940616e-06, "loss": 0.5799, "step": 8485 }, { "epoch": 0.5973952833509327, "grad_norm": 1.6479235887527466, "learning_rate": 5.154628387091606e-06, "loss": 0.5741, "step": 8486 }, { "epoch": 0.5974656810982049, "grad_norm": 1.8764299154281616, "learning_rate": 5.153088790719599e-06, "loss": 0.6456, "step": 8487 }, { "epoch": 0.597536078845477, "grad_norm": 2.1327567100524902, "learning_rate": 5.151549290370173e-06, "loss": 0.8826, "step": 8488 }, { "epoch": 0.597606476592749, "grad_norm": 1.8685656785964966, "learning_rate": 5.150009886123376e-06, "loss": 0.7579, "step": 8489 }, { "epoch": 0.5976768743400211, "grad_norm": 1.8479071855545044, "learning_rate": 5.14847057805924e-06, "loss": 0.642, "step": 8490 }, { "epoch": 0.5977472720872932, "grad_norm": 2.115830659866333, "learning_rate": 5.1469313662577965e-06, "loss": 0.6204, "step": 8491 }, { "epoch": 0.5978176698345653, "grad_norm": 1.9722508192062378, "learning_rate": 5.145392250799067e-06, "loss": 0.6289, "step": 8492 }, { "epoch": 0.5978880675818374, "grad_norm": 1.6348600387573242, "learning_rate": 5.143853231763078e-06, "loss": 0.6839, "step": 8493 }, { "epoch": 0.5979584653291095, "grad_norm": 1.7035253047943115, "learning_rate": 5.1423143092298365e-06, "loss": 0.6619, "step": 8494 }, { "epoch": 0.5980288630763816, "grad_norm": 2.2626399993896484, "learning_rate": 5.140775483279359e-06, "loss": 0.6119, "step": 8495 }, { "epoch": 0.5980992608236536, "grad_norm": 1.7520761489868164, "learning_rate": 5.139236753991646e-06, "loss": 0.7304, "step": 8496 }, { "epoch": 0.5981696585709257, "grad_norm": 1.6778563261032104, "learning_rate": 5.137698121446702e-06, "loss": 0.6409, "step": 8497 }, { "epoch": 0.5982400563181978, "grad_norm": 1.952854037284851, "learning_rate": 5.136159585724516e-06, "loss": 0.5217, "step": 8498 }, { "epoch": 0.5983104540654699, "grad_norm": 1.5444109439849854, "learning_rate": 5.134621146905085e-06, "loss": 0.6839, "step": 8499 }, { "epoch": 0.598380851812742, "grad_norm": 1.5762684345245361, "learning_rate": 5.133082805068386e-06, "loss": 0.6792, "step": 8500 }, { "epoch": 0.598451249560014, "grad_norm": 1.628889799118042, "learning_rate": 5.1315445602944056e-06, "loss": 0.6528, "step": 8501 }, { "epoch": 0.5985216473072862, "grad_norm": 2.0879697799682617, "learning_rate": 5.130006412663113e-06, "loss": 0.7015, "step": 8502 }, { "epoch": 0.5985920450545582, "grad_norm": 1.8271204233169556, "learning_rate": 5.128468362254485e-06, "loss": 0.601, "step": 8503 }, { "epoch": 0.5986624428018303, "grad_norm": 2.1266415119171143, "learning_rate": 5.126930409148476e-06, "loss": 0.6009, "step": 8504 }, { "epoch": 0.5987328405491025, "grad_norm": 1.7206013202667236, "learning_rate": 5.125392553425056e-06, "loss": 0.6591, "step": 8505 }, { "epoch": 0.5988032382963745, "grad_norm": 1.7175180912017822, "learning_rate": 5.1238547951641726e-06, "loss": 0.6439, "step": 8506 }, { "epoch": 0.5988736360436466, "grad_norm": 2.0200042724609375, "learning_rate": 5.122317134445779e-06, "loss": 0.6865, "step": 8507 }, { "epoch": 0.5989440337909187, "grad_norm": 1.5090858936309814, "learning_rate": 5.120779571349817e-06, "loss": 0.7067, "step": 8508 }, { "epoch": 0.5990144315381908, "grad_norm": 1.749621868133545, "learning_rate": 5.119242105956228e-06, "loss": 0.6408, "step": 8509 }, { "epoch": 0.5990848292854629, "grad_norm": 2.2275054454803467, "learning_rate": 5.117704738344943e-06, "loss": 0.6467, "step": 8510 }, { "epoch": 0.5991552270327349, "grad_norm": 1.6324996948242188, "learning_rate": 5.1161674685958965e-06, "loss": 0.5713, "step": 8511 }, { "epoch": 0.5992256247800071, "grad_norm": 2.0564937591552734, "learning_rate": 5.114630296789008e-06, "loss": 0.7003, "step": 8512 }, { "epoch": 0.5992960225272791, "grad_norm": 3.7268025875091553, "learning_rate": 5.113093223004198e-06, "loss": 0.5804, "step": 8513 }, { "epoch": 0.5993664202745512, "grad_norm": 1.8240187168121338, "learning_rate": 5.111556247321379e-06, "loss": 0.6736, "step": 8514 }, { "epoch": 0.5994368180218232, "grad_norm": 1.7403531074523926, "learning_rate": 5.110019369820463e-06, "loss": 0.6275, "step": 8515 }, { "epoch": 0.5995072157690954, "grad_norm": 1.7659893035888672, "learning_rate": 5.108482590581348e-06, "loss": 0.5921, "step": 8516 }, { "epoch": 0.5995776135163675, "grad_norm": 1.7897708415985107, "learning_rate": 5.106945909683941e-06, "loss": 0.6959, "step": 8517 }, { "epoch": 0.5996480112636395, "grad_norm": 1.8650621175765991, "learning_rate": 5.105409327208127e-06, "loss": 0.7648, "step": 8518 }, { "epoch": 0.5997184090109117, "grad_norm": 1.6196131706237793, "learning_rate": 5.103872843233801e-06, "loss": 0.5292, "step": 8519 }, { "epoch": 0.5997888067581837, "grad_norm": 1.8076646327972412, "learning_rate": 5.10233645784084e-06, "loss": 0.6484, "step": 8520 }, { "epoch": 0.5998592045054558, "grad_norm": 1.6266454458236694, "learning_rate": 5.100800171109128e-06, "loss": 0.7238, "step": 8521 }, { "epoch": 0.599929602252728, "grad_norm": 1.970662236213684, "learning_rate": 5.099263983118532e-06, "loss": 0.6819, "step": 8522 }, { "epoch": 0.6, "grad_norm": 1.5631868839263916, "learning_rate": 5.097727893948924e-06, "loss": 0.5997, "step": 8523 }, { "epoch": 0.6000703977472721, "grad_norm": 1.7029139995574951, "learning_rate": 5.096191903680167e-06, "loss": 0.7245, "step": 8524 }, { "epoch": 0.6001407954945441, "grad_norm": 1.736367106437683, "learning_rate": 5.094656012392114e-06, "loss": 0.6938, "step": 8525 }, { "epoch": 0.6002111932418163, "grad_norm": 2.0227532386779785, "learning_rate": 5.093120220164626e-06, "loss": 0.8128, "step": 8526 }, { "epoch": 0.6002815909890884, "grad_norm": 1.8936967849731445, "learning_rate": 5.091584527077539e-06, "loss": 0.6401, "step": 8527 }, { "epoch": 0.6003519887363604, "grad_norm": 1.7083042860031128, "learning_rate": 5.0900489332107044e-06, "loss": 0.7659, "step": 8528 }, { "epoch": 0.6004223864836326, "grad_norm": 2.085153579711914, "learning_rate": 5.088513438643955e-06, "loss": 0.6942, "step": 8529 }, { "epoch": 0.6004927842309046, "grad_norm": 1.618595838546753, "learning_rate": 5.086978043457122e-06, "loss": 0.711, "step": 8530 }, { "epoch": 0.6005631819781767, "grad_norm": 1.9468778371810913, "learning_rate": 5.085442747730032e-06, "loss": 0.7405, "step": 8531 }, { "epoch": 0.6006335797254488, "grad_norm": 1.9404460191726685, "learning_rate": 5.083907551542512e-06, "loss": 0.7658, "step": 8532 }, { "epoch": 0.6007039774727209, "grad_norm": 1.663267731666565, "learning_rate": 5.082372454974368e-06, "loss": 0.707, "step": 8533 }, { "epoch": 0.600774375219993, "grad_norm": 1.893189549446106, "learning_rate": 5.080837458105421e-06, "loss": 0.6555, "step": 8534 }, { "epoch": 0.600844772967265, "grad_norm": 1.995410680770874, "learning_rate": 5.07930256101547e-06, "loss": 0.7091, "step": 8535 }, { "epoch": 0.6009151707145372, "grad_norm": 1.783005714416504, "learning_rate": 5.077767763784321e-06, "loss": 0.6776, "step": 8536 }, { "epoch": 0.6009855684618092, "grad_norm": 1.8897106647491455, "learning_rate": 5.076233066491764e-06, "loss": 0.7505, "step": 8537 }, { "epoch": 0.6010559662090813, "grad_norm": 1.9082528352737427, "learning_rate": 5.074698469217595e-06, "loss": 0.7357, "step": 8538 }, { "epoch": 0.6011263639563534, "grad_norm": 1.789666771888733, "learning_rate": 5.073163972041593e-06, "loss": 0.691, "step": 8539 }, { "epoch": 0.6011967617036255, "grad_norm": 2.198082685470581, "learning_rate": 5.071629575043544e-06, "loss": 0.6756, "step": 8540 }, { "epoch": 0.6012671594508976, "grad_norm": 1.8661085367202759, "learning_rate": 5.070095278303218e-06, "loss": 0.6064, "step": 8541 }, { "epoch": 0.6013375571981696, "grad_norm": 2.3743979930877686, "learning_rate": 5.068561081900388e-06, "loss": 0.6505, "step": 8542 }, { "epoch": 0.6014079549454417, "grad_norm": 1.5505473613739014, "learning_rate": 5.067026985914814e-06, "loss": 0.7101, "step": 8543 }, { "epoch": 0.6014783526927139, "grad_norm": 2.0057239532470703, "learning_rate": 5.065492990426262e-06, "loss": 0.6564, "step": 8544 }, { "epoch": 0.6015487504399859, "grad_norm": 1.8085163831710815, "learning_rate": 5.063959095514476e-06, "loss": 0.7303, "step": 8545 }, { "epoch": 0.601619148187258, "grad_norm": 1.828218936920166, "learning_rate": 5.062425301259214e-06, "loss": 0.582, "step": 8546 }, { "epoch": 0.6016895459345301, "grad_norm": 2.087665319442749, "learning_rate": 5.0608916077402135e-06, "loss": 0.6019, "step": 8547 }, { "epoch": 0.6017599436818022, "grad_norm": 1.8910046815872192, "learning_rate": 5.059358015037216e-06, "loss": 0.684, "step": 8548 }, { "epoch": 0.6018303414290743, "grad_norm": 1.5470184087753296, "learning_rate": 5.057824523229949e-06, "loss": 0.6448, "step": 8549 }, { "epoch": 0.6019007391763463, "grad_norm": 1.802042841911316, "learning_rate": 5.056291132398149e-06, "loss": 0.7019, "step": 8550 }, { "epoch": 0.6019711369236185, "grad_norm": 1.485946774482727, "learning_rate": 5.0547578426215295e-06, "loss": 0.6342, "step": 8551 }, { "epoch": 0.6020415346708905, "grad_norm": 2.2183377742767334, "learning_rate": 5.053224653979814e-06, "loss": 0.5428, "step": 8552 }, { "epoch": 0.6021119324181626, "grad_norm": 2.3267033100128174, "learning_rate": 5.0516915665527106e-06, "loss": 0.7075, "step": 8553 }, { "epoch": 0.6021823301654347, "grad_norm": 2.0063416957855225, "learning_rate": 5.050158580419929e-06, "loss": 0.6759, "step": 8554 }, { "epoch": 0.6022527279127068, "grad_norm": 1.9246195554733276, "learning_rate": 5.048625695661164e-06, "loss": 0.8149, "step": 8555 }, { "epoch": 0.6023231256599789, "grad_norm": 1.7804750204086304, "learning_rate": 5.0470929123561215e-06, "loss": 0.7818, "step": 8556 }, { "epoch": 0.6023935234072509, "grad_norm": 2.0596866607666016, "learning_rate": 5.0455602305844845e-06, "loss": 0.663, "step": 8557 }, { "epoch": 0.6024639211545231, "grad_norm": 1.7001620531082153, "learning_rate": 5.044027650425942e-06, "loss": 0.6498, "step": 8558 }, { "epoch": 0.6025343189017951, "grad_norm": 1.6457452774047852, "learning_rate": 5.042495171960174e-06, "loss": 0.6394, "step": 8559 }, { "epoch": 0.6026047166490672, "grad_norm": 1.707058310508728, "learning_rate": 5.040962795266855e-06, "loss": 0.6914, "step": 8560 }, { "epoch": 0.6026751143963394, "grad_norm": 1.6837317943572998, "learning_rate": 5.039430520425651e-06, "loss": 0.589, "step": 8561 }, { "epoch": 0.6027455121436114, "grad_norm": 1.6144492626190186, "learning_rate": 5.037898347516233e-06, "loss": 0.6763, "step": 8562 }, { "epoch": 0.6028159098908835, "grad_norm": 1.8078725337982178, "learning_rate": 5.036366276618256e-06, "loss": 0.7543, "step": 8563 }, { "epoch": 0.6028863076381555, "grad_norm": 1.763501763343811, "learning_rate": 5.034834307811376e-06, "loss": 0.7227, "step": 8564 }, { "epoch": 0.6029567053854277, "grad_norm": 1.807627558708191, "learning_rate": 5.033302441175239e-06, "loss": 0.6722, "step": 8565 }, { "epoch": 0.6030271031326998, "grad_norm": 1.5825691223144531, "learning_rate": 5.0317706767894895e-06, "loss": 0.6777, "step": 8566 }, { "epoch": 0.6030975008799718, "grad_norm": 1.8179593086242676, "learning_rate": 5.030239014733763e-06, "loss": 0.717, "step": 8567 }, { "epoch": 0.603167898627244, "grad_norm": 1.7985919713974, "learning_rate": 5.028707455087697e-06, "loss": 0.58, "step": 8568 }, { "epoch": 0.603238296374516, "grad_norm": 2.086974620819092, "learning_rate": 5.027175997930915e-06, "loss": 0.704, "step": 8569 }, { "epoch": 0.6033086941217881, "grad_norm": 1.8145968914031982, "learning_rate": 5.025644643343039e-06, "loss": 0.7088, "step": 8570 }, { "epoch": 0.6033790918690601, "grad_norm": 1.7276487350463867, "learning_rate": 5.0241133914036865e-06, "loss": 0.6269, "step": 8571 }, { "epoch": 0.6034494896163323, "grad_norm": 1.9103460311889648, "learning_rate": 5.0225822421924685e-06, "loss": 0.6339, "step": 8572 }, { "epoch": 0.6035198873636044, "grad_norm": 2.020648956298828, "learning_rate": 5.021051195788988e-06, "loss": 0.7199, "step": 8573 }, { "epoch": 0.6035902851108764, "grad_norm": 2.1175076961517334, "learning_rate": 5.01952025227285e-06, "loss": 0.5114, "step": 8574 }, { "epoch": 0.6036606828581486, "grad_norm": 1.6328963041305542, "learning_rate": 5.017989411723648e-06, "loss": 0.5377, "step": 8575 }, { "epoch": 0.6037310806054206, "grad_norm": 1.746058702468872, "learning_rate": 5.0164586742209705e-06, "loss": 0.5768, "step": 8576 }, { "epoch": 0.6038014783526927, "grad_norm": 1.9382121562957764, "learning_rate": 5.014928039844405e-06, "loss": 0.7536, "step": 8577 }, { "epoch": 0.6038718760999648, "grad_norm": 1.689061164855957, "learning_rate": 5.013397508673526e-06, "loss": 0.6299, "step": 8578 }, { "epoch": 0.6039422738472369, "grad_norm": 2.1944515705108643, "learning_rate": 5.011867080787912e-06, "loss": 0.5826, "step": 8579 }, { "epoch": 0.604012671594509, "grad_norm": 1.8450775146484375, "learning_rate": 5.010336756267126e-06, "loss": 0.7151, "step": 8580 }, { "epoch": 0.604083069341781, "grad_norm": 1.6049631834030151, "learning_rate": 5.008806535190737e-06, "loss": 0.626, "step": 8581 }, { "epoch": 0.6041534670890532, "grad_norm": 1.8794314861297607, "learning_rate": 5.007276417638297e-06, "loss": 0.6628, "step": 8582 }, { "epoch": 0.6042238648363253, "grad_norm": 1.6767003536224365, "learning_rate": 5.005746403689365e-06, "loss": 0.6897, "step": 8583 }, { "epoch": 0.6042942625835973, "grad_norm": 1.8321048021316528, "learning_rate": 5.004216493423479e-06, "loss": 0.6293, "step": 8584 }, { "epoch": 0.6043646603308694, "grad_norm": 1.9451040029525757, "learning_rate": 5.0026866869201875e-06, "loss": 0.6728, "step": 8585 }, { "epoch": 0.6044350580781415, "grad_norm": 1.7296987771987915, "learning_rate": 5.001156984259023e-06, "loss": 0.6348, "step": 8586 }, { "epoch": 0.6045054558254136, "grad_norm": 1.8339595794677734, "learning_rate": 4.9996273855195184e-06, "loss": 0.6729, "step": 8587 }, { "epoch": 0.6045758535726857, "grad_norm": 1.8075182437896729, "learning_rate": 4.998097890781195e-06, "loss": 0.7091, "step": 8588 }, { "epoch": 0.6046462513199578, "grad_norm": 1.6169601678848267, "learning_rate": 4.99656850012358e-06, "loss": 0.8479, "step": 8589 }, { "epoch": 0.6047166490672299, "grad_norm": 1.6145334243774414, "learning_rate": 4.995039213626177e-06, "loss": 0.6904, "step": 8590 }, { "epoch": 0.6047870468145019, "grad_norm": 1.8232134580612183, "learning_rate": 4.993510031368506e-06, "loss": 0.6731, "step": 8591 }, { "epoch": 0.604857444561774, "grad_norm": 1.6872954368591309, "learning_rate": 4.991980953430064e-06, "loss": 0.6136, "step": 8592 }, { "epoch": 0.6049278423090461, "grad_norm": 2.0798399448394775, "learning_rate": 4.9904519798903505e-06, "loss": 0.6573, "step": 8593 }, { "epoch": 0.6049982400563182, "grad_norm": 1.7522470951080322, "learning_rate": 4.988923110828857e-06, "loss": 0.6002, "step": 8594 }, { "epoch": 0.6050686378035903, "grad_norm": 1.7747365236282349, "learning_rate": 4.987394346325076e-06, "loss": 0.7461, "step": 8595 }, { "epoch": 0.6051390355508623, "grad_norm": 2.0236268043518066, "learning_rate": 4.985865686458483e-06, "loss": 0.8032, "step": 8596 }, { "epoch": 0.6052094332981345, "grad_norm": 1.721683144569397, "learning_rate": 4.984337131308558e-06, "loss": 0.6456, "step": 8597 }, { "epoch": 0.6052798310454065, "grad_norm": 1.8686591386795044, "learning_rate": 4.9828086809547706e-06, "loss": 0.7465, "step": 8598 }, { "epoch": 0.6053502287926786, "grad_norm": 1.7068930864334106, "learning_rate": 4.981280335476588e-06, "loss": 0.6524, "step": 8599 }, { "epoch": 0.6054206265399508, "grad_norm": 1.6364012956619263, "learning_rate": 4.979752094953465e-06, "loss": 0.6642, "step": 8600 }, { "epoch": 0.6054910242872228, "grad_norm": 1.750077486038208, "learning_rate": 4.978223959464866e-06, "loss": 0.6503, "step": 8601 }, { "epoch": 0.6055614220344949, "grad_norm": 1.8953856229782104, "learning_rate": 4.97669592909023e-06, "loss": 0.6158, "step": 8602 }, { "epoch": 0.605631819781767, "grad_norm": 1.84130859375, "learning_rate": 4.975168003909008e-06, "loss": 0.6265, "step": 8603 }, { "epoch": 0.6057022175290391, "grad_norm": 2.011159896850586, "learning_rate": 4.973640184000633e-06, "loss": 0.6502, "step": 8604 }, { "epoch": 0.6057726152763112, "grad_norm": 1.7962387800216675, "learning_rate": 4.972112469444542e-06, "loss": 0.6226, "step": 8605 }, { "epoch": 0.6058430130235832, "grad_norm": 1.620439887046814, "learning_rate": 4.970584860320157e-06, "loss": 0.5859, "step": 8606 }, { "epoch": 0.6059134107708554, "grad_norm": 2.069469451904297, "learning_rate": 4.969057356706909e-06, "loss": 0.6181, "step": 8607 }, { "epoch": 0.6059838085181274, "grad_norm": 2.1689705848693848, "learning_rate": 4.967529958684201e-06, "loss": 0.7674, "step": 8608 }, { "epoch": 0.6060542062653995, "grad_norm": 1.623953938484192, "learning_rate": 4.966002666331456e-06, "loss": 0.6677, "step": 8609 }, { "epoch": 0.6061246040126715, "grad_norm": 1.667476773262024, "learning_rate": 4.964475479728072e-06, "loss": 0.6912, "step": 8610 }, { "epoch": 0.6061950017599437, "grad_norm": 2.1617777347564697, "learning_rate": 4.962948398953455e-06, "loss": 0.7104, "step": 8611 }, { "epoch": 0.6062653995072158, "grad_norm": 2.39357590675354, "learning_rate": 4.96142142408699e-06, "loss": 0.639, "step": 8612 }, { "epoch": 0.6063357972544878, "grad_norm": 1.6362249851226807, "learning_rate": 4.959894555208076e-06, "loss": 0.6794, "step": 8613 }, { "epoch": 0.60640619500176, "grad_norm": 1.9354060888290405, "learning_rate": 4.95836779239609e-06, "loss": 0.6587, "step": 8614 }, { "epoch": 0.606476592749032, "grad_norm": 1.7982354164123535, "learning_rate": 4.956841135730412e-06, "loss": 0.72, "step": 8615 }, { "epoch": 0.6065469904963041, "grad_norm": 2.1061108112335205, "learning_rate": 4.955314585290413e-06, "loss": 0.7104, "step": 8616 }, { "epoch": 0.6066173882435762, "grad_norm": 1.7167054414749146, "learning_rate": 4.953788141155462e-06, "loss": 0.6446, "step": 8617 }, { "epoch": 0.6066877859908483, "grad_norm": 1.6067792177200317, "learning_rate": 4.952261803404916e-06, "loss": 0.6787, "step": 8618 }, { "epoch": 0.6067581837381204, "grad_norm": 1.7047390937805176, "learning_rate": 4.9507355721181354e-06, "loss": 0.7202, "step": 8619 }, { "epoch": 0.6068285814853924, "grad_norm": 1.7207175493240356, "learning_rate": 4.949209447374467e-06, "loss": 0.6641, "step": 8620 }, { "epoch": 0.6068989792326646, "grad_norm": 2.1717405319213867, "learning_rate": 4.94768342925326e-06, "loss": 0.7327, "step": 8621 }, { "epoch": 0.6069693769799367, "grad_norm": 2.1958510875701904, "learning_rate": 4.946157517833847e-06, "loss": 0.6688, "step": 8622 }, { "epoch": 0.6070397747272087, "grad_norm": 1.7416307926177979, "learning_rate": 4.9446317131955666e-06, "loss": 0.6456, "step": 8623 }, { "epoch": 0.6071101724744808, "grad_norm": 1.721818208694458, "learning_rate": 4.943106015417743e-06, "loss": 0.581, "step": 8624 }, { "epoch": 0.6071805702217529, "grad_norm": 1.853506326675415, "learning_rate": 4.9415804245797e-06, "loss": 0.7294, "step": 8625 }, { "epoch": 0.607250967969025, "grad_norm": 1.691701054573059, "learning_rate": 4.9400549407607575e-06, "loss": 0.6592, "step": 8626 }, { "epoch": 0.607321365716297, "grad_norm": 1.6401640176773071, "learning_rate": 4.9385295640402225e-06, "loss": 0.6847, "step": 8627 }, { "epoch": 0.6073917634635692, "grad_norm": 1.9085845947265625, "learning_rate": 4.937004294497404e-06, "loss": 0.6931, "step": 8628 }, { "epoch": 0.6074621612108413, "grad_norm": 3.475663185119629, "learning_rate": 4.935479132211597e-06, "loss": 0.741, "step": 8629 }, { "epoch": 0.6075325589581133, "grad_norm": 2.1198320388793945, "learning_rate": 4.933954077262104e-06, "loss": 0.6784, "step": 8630 }, { "epoch": 0.6076029567053854, "grad_norm": 1.8417434692382812, "learning_rate": 4.932429129728207e-06, "loss": 0.5745, "step": 8631 }, { "epoch": 0.6076733544526575, "grad_norm": 1.7354316711425781, "learning_rate": 4.930904289689195e-06, "loss": 0.672, "step": 8632 }, { "epoch": 0.6077437521999296, "grad_norm": 1.819901943206787, "learning_rate": 4.929379557224338e-06, "loss": 0.675, "step": 8633 }, { "epoch": 0.6078141499472017, "grad_norm": 2.0929665565490723, "learning_rate": 4.927854932412919e-06, "loss": 0.7386, "step": 8634 }, { "epoch": 0.6078845476944738, "grad_norm": 1.7057753801345825, "learning_rate": 4.926330415334193e-06, "loss": 0.648, "step": 8635 }, { "epoch": 0.6079549454417459, "grad_norm": 1.669165849685669, "learning_rate": 4.92480600606743e-06, "loss": 0.6149, "step": 8636 }, { "epoch": 0.6080253431890179, "grad_norm": 1.5323164463043213, "learning_rate": 4.9232817046918816e-06, "loss": 0.6277, "step": 8637 }, { "epoch": 0.60809574093629, "grad_norm": 2.0705578327178955, "learning_rate": 4.921757511286799e-06, "loss": 0.7206, "step": 8638 }, { "epoch": 0.6081661386835622, "grad_norm": 1.495934009552002, "learning_rate": 4.920233425931422e-06, "loss": 0.5382, "step": 8639 }, { "epoch": 0.6082365364308342, "grad_norm": 1.622482419013977, "learning_rate": 4.918709448704998e-06, "loss": 0.7209, "step": 8640 }, { "epoch": 0.6083069341781063, "grad_norm": 1.783653974533081, "learning_rate": 4.91718557968675e-06, "loss": 0.6364, "step": 8641 }, { "epoch": 0.6083773319253784, "grad_norm": 1.5827827453613281, "learning_rate": 4.9156618189559135e-06, "loss": 0.6842, "step": 8642 }, { "epoch": 0.6084477296726505, "grad_norm": 1.6192960739135742, "learning_rate": 4.914138166591705e-06, "loss": 0.654, "step": 8643 }, { "epoch": 0.6085181274199226, "grad_norm": 1.6428182125091553, "learning_rate": 4.912614622673343e-06, "loss": 0.6932, "step": 8644 }, { "epoch": 0.6085885251671946, "grad_norm": 1.7241443395614624, "learning_rate": 4.911091187280035e-06, "loss": 0.5787, "step": 8645 }, { "epoch": 0.6086589229144668, "grad_norm": 1.3127484321594238, "learning_rate": 4.909567860490992e-06, "loss": 0.7886, "step": 8646 }, { "epoch": 0.6087293206617388, "grad_norm": 2.382514715194702, "learning_rate": 4.908044642385405e-06, "loss": 0.7425, "step": 8647 }, { "epoch": 0.6087997184090109, "grad_norm": 1.8432049751281738, "learning_rate": 4.906521533042474e-06, "loss": 0.6922, "step": 8648 }, { "epoch": 0.608870116156283, "grad_norm": 1.8328766822814941, "learning_rate": 4.9049985325413835e-06, "loss": 0.6204, "step": 8649 }, { "epoch": 0.6089405139035551, "grad_norm": 1.701912522315979, "learning_rate": 4.903475640961318e-06, "loss": 0.7689, "step": 8650 }, { "epoch": 0.6090109116508272, "grad_norm": 1.9696065187454224, "learning_rate": 4.90195285838145e-06, "loss": 0.6389, "step": 8651 }, { "epoch": 0.6090813093980992, "grad_norm": 1.622334599494934, "learning_rate": 4.900430184880958e-06, "loss": 0.6418, "step": 8652 }, { "epoch": 0.6091517071453714, "grad_norm": 1.9507313966751099, "learning_rate": 4.898907620538997e-06, "loss": 0.6345, "step": 8653 }, { "epoch": 0.6092221048926434, "grad_norm": 1.728570580482483, "learning_rate": 4.897385165434734e-06, "loss": 0.6532, "step": 8654 }, { "epoch": 0.6092925026399155, "grad_norm": 1.7806295156478882, "learning_rate": 4.8958628196473194e-06, "loss": 0.6795, "step": 8655 }, { "epoch": 0.6093629003871877, "grad_norm": 1.9620670080184937, "learning_rate": 4.894340583255904e-06, "loss": 0.6742, "step": 8656 }, { "epoch": 0.6094332981344597, "grad_norm": 1.563077449798584, "learning_rate": 4.892818456339626e-06, "loss": 0.7534, "step": 8657 }, { "epoch": 0.6095036958817318, "grad_norm": 2.08535099029541, "learning_rate": 4.891296438977629e-06, "loss": 0.6165, "step": 8658 }, { "epoch": 0.6095740936290038, "grad_norm": 1.7356245517730713, "learning_rate": 4.8897745312490355e-06, "loss": 0.6216, "step": 8659 }, { "epoch": 0.609644491376276, "grad_norm": 1.697792410850525, "learning_rate": 4.888252733232979e-06, "loss": 0.5639, "step": 8660 }, { "epoch": 0.6097148891235481, "grad_norm": 1.6127930879592896, "learning_rate": 4.886731045008573e-06, "loss": 0.6282, "step": 8661 }, { "epoch": 0.6097852868708201, "grad_norm": 1.7413667440414429, "learning_rate": 4.885209466654936e-06, "loss": 0.8092, "step": 8662 }, { "epoch": 0.6098556846180923, "grad_norm": 1.8518530130386353, "learning_rate": 4.88368799825117e-06, "loss": 0.7137, "step": 8663 }, { "epoch": 0.6099260823653643, "grad_norm": 2.069244146347046, "learning_rate": 4.8821666398763854e-06, "loss": 0.7668, "step": 8664 }, { "epoch": 0.6099964801126364, "grad_norm": 2.053692102432251, "learning_rate": 4.8806453916096735e-06, "loss": 0.6498, "step": 8665 }, { "epoch": 0.6100668778599084, "grad_norm": 1.6638824939727783, "learning_rate": 4.8791242535301285e-06, "loss": 0.5847, "step": 8666 }, { "epoch": 0.6101372756071806, "grad_norm": 2.1653056144714355, "learning_rate": 4.877603225716833e-06, "loss": 0.6058, "step": 8667 }, { "epoch": 0.6102076733544527, "grad_norm": 1.6418386697769165, "learning_rate": 4.87608230824887e-06, "loss": 0.7365, "step": 8668 }, { "epoch": 0.6102780711017247, "grad_norm": 1.8699485063552856, "learning_rate": 4.874561501205307e-06, "loss": 0.7478, "step": 8669 }, { "epoch": 0.6103484688489969, "grad_norm": 1.6281390190124512, "learning_rate": 4.873040804665221e-06, "loss": 0.5714, "step": 8670 }, { "epoch": 0.6104188665962689, "grad_norm": 1.4529949426651, "learning_rate": 4.871520218707667e-06, "loss": 0.6216, "step": 8671 }, { "epoch": 0.610489264343541, "grad_norm": 1.5686602592468262, "learning_rate": 4.869999743411707e-06, "loss": 0.6653, "step": 8672 }, { "epoch": 0.6105596620908131, "grad_norm": 2.0689964294433594, "learning_rate": 4.868479378856387e-06, "loss": 0.5769, "step": 8673 }, { "epoch": 0.6106300598380852, "grad_norm": 2.269317626953125, "learning_rate": 4.866959125120755e-06, "loss": 0.7247, "step": 8674 }, { "epoch": 0.6107004575853573, "grad_norm": 1.6893086433410645, "learning_rate": 4.86543898228385e-06, "loss": 0.6441, "step": 8675 }, { "epoch": 0.6107708553326293, "grad_norm": 1.7410163879394531, "learning_rate": 4.863918950424706e-06, "loss": 0.6169, "step": 8676 }, { "epoch": 0.6108412530799014, "grad_norm": 1.7204935550689697, "learning_rate": 4.862399029622351e-06, "loss": 0.6326, "step": 8677 }, { "epoch": 0.6109116508271736, "grad_norm": 2.174363374710083, "learning_rate": 4.860879219955805e-06, "loss": 0.6507, "step": 8678 }, { "epoch": 0.6109820485744456, "grad_norm": 2.1079442501068115, "learning_rate": 4.859359521504087e-06, "loss": 0.6801, "step": 8679 }, { "epoch": 0.6110524463217177, "grad_norm": 1.7487503290176392, "learning_rate": 4.857839934346204e-06, "loss": 0.6493, "step": 8680 }, { "epoch": 0.6111228440689898, "grad_norm": 1.476908802986145, "learning_rate": 4.856320458561164e-06, "loss": 0.6192, "step": 8681 }, { "epoch": 0.6111932418162619, "grad_norm": 1.7957619428634644, "learning_rate": 4.854801094227965e-06, "loss": 0.5708, "step": 8682 }, { "epoch": 0.6112636395635339, "grad_norm": 1.8126438856124878, "learning_rate": 4.853281841425601e-06, "loss": 0.6581, "step": 8683 }, { "epoch": 0.611334037310806, "grad_norm": 1.8898149728775024, "learning_rate": 4.851762700233057e-06, "loss": 0.6331, "step": 8684 }, { "epoch": 0.6114044350580782, "grad_norm": 1.6255416870117188, "learning_rate": 4.8502436707293165e-06, "loss": 0.6618, "step": 8685 }, { "epoch": 0.6114748328053502, "grad_norm": 1.7979999780654907, "learning_rate": 4.848724752993353e-06, "loss": 0.6206, "step": 8686 }, { "epoch": 0.6115452305526223, "grad_norm": 1.6913442611694336, "learning_rate": 4.847205947104139e-06, "loss": 0.5264, "step": 8687 }, { "epoch": 0.6116156282998944, "grad_norm": 1.826324224472046, "learning_rate": 4.8456872531406365e-06, "loss": 0.7159, "step": 8688 }, { "epoch": 0.6116860260471665, "grad_norm": 1.973144769668579, "learning_rate": 4.844168671181807e-06, "loss": 0.7312, "step": 8689 }, { "epoch": 0.6117564237944386, "grad_norm": 1.4526677131652832, "learning_rate": 4.8426502013065975e-06, "loss": 0.7199, "step": 8690 }, { "epoch": 0.6118268215417106, "grad_norm": 1.944908857345581, "learning_rate": 4.841131843593961e-06, "loss": 0.7364, "step": 8691 }, { "epoch": 0.6118972192889828, "grad_norm": 1.7158997058868408, "learning_rate": 4.839613598122832e-06, "loss": 0.6059, "step": 8692 }, { "epoch": 0.6119676170362548, "grad_norm": 1.6234071254730225, "learning_rate": 4.838095464972152e-06, "loss": 0.8222, "step": 8693 }, { "epoch": 0.6120380147835269, "grad_norm": 1.7620798349380493, "learning_rate": 4.836577444220845e-06, "loss": 0.7459, "step": 8694 }, { "epoch": 0.6121084125307991, "grad_norm": 1.5444415807724, "learning_rate": 4.8350595359478374e-06, "loss": 0.5691, "step": 8695 }, { "epoch": 0.6121788102780711, "grad_norm": 1.7618365287780762, "learning_rate": 4.833541740232041e-06, "loss": 0.7172, "step": 8696 }, { "epoch": 0.6122492080253432, "grad_norm": 1.566286325454712, "learning_rate": 4.832024057152378e-06, "loss": 0.6387, "step": 8697 }, { "epoch": 0.6123196057726152, "grad_norm": 1.959796667098999, "learning_rate": 4.830506486787742e-06, "loss": 0.6381, "step": 8698 }, { "epoch": 0.6123900035198874, "grad_norm": 1.8452783823013306, "learning_rate": 4.828989029217042e-06, "loss": 0.7665, "step": 8699 }, { "epoch": 0.6124604012671595, "grad_norm": 1.5699717998504639, "learning_rate": 4.827471684519167e-06, "loss": 0.7198, "step": 8700 }, { "epoch": 0.6125307990144315, "grad_norm": 1.4632755517959595, "learning_rate": 4.825954452773008e-06, "loss": 0.6875, "step": 8701 }, { "epoch": 0.6126011967617037, "grad_norm": 1.863256573677063, "learning_rate": 4.824437334057443e-06, "loss": 0.6288, "step": 8702 }, { "epoch": 0.6126715945089757, "grad_norm": 1.6889747381210327, "learning_rate": 4.822920328451355e-06, "loss": 0.5735, "step": 8703 }, { "epoch": 0.6127419922562478, "grad_norm": 1.6585556268692017, "learning_rate": 4.821403436033608e-06, "loss": 0.7663, "step": 8704 }, { "epoch": 0.6128123900035198, "grad_norm": 1.7450321912765503, "learning_rate": 4.819886656883071e-06, "loss": 0.6694, "step": 8705 }, { "epoch": 0.612882787750792, "grad_norm": 1.5075526237487793, "learning_rate": 4.8183699910786e-06, "loss": 0.5755, "step": 8706 }, { "epoch": 0.6129531854980641, "grad_norm": 1.762325644493103, "learning_rate": 4.816853438699051e-06, "loss": 0.7159, "step": 8707 }, { "epoch": 0.6130235832453361, "grad_norm": 1.9194058179855347, "learning_rate": 4.815336999823265e-06, "loss": 0.6351, "step": 8708 }, { "epoch": 0.6130939809926083, "grad_norm": 1.6477009057998657, "learning_rate": 4.813820674530092e-06, "loss": 0.6652, "step": 8709 }, { "epoch": 0.6131643787398803, "grad_norm": 1.8129243850708008, "learning_rate": 4.812304462898357e-06, "loss": 0.6451, "step": 8710 }, { "epoch": 0.6132347764871524, "grad_norm": 1.919729471206665, "learning_rate": 4.810788365006897e-06, "loss": 0.6682, "step": 8711 }, { "epoch": 0.6133051742344245, "grad_norm": 1.828072428703308, "learning_rate": 4.809272380934532e-06, "loss": 0.712, "step": 8712 }, { "epoch": 0.6133755719816966, "grad_norm": 1.899174451828003, "learning_rate": 4.807756510760082e-06, "loss": 0.6919, "step": 8713 }, { "epoch": 0.6134459697289687, "grad_norm": 1.8846534490585327, "learning_rate": 4.806240754562353e-06, "loss": 0.7326, "step": 8714 }, { "epoch": 0.6135163674762407, "grad_norm": 2.165464401245117, "learning_rate": 4.804725112420157e-06, "loss": 0.6908, "step": 8715 }, { "epoch": 0.6135867652235129, "grad_norm": 1.6720211505889893, "learning_rate": 4.803209584412287e-06, "loss": 0.6773, "step": 8716 }, { "epoch": 0.613657162970785, "grad_norm": 1.797785758972168, "learning_rate": 4.8016941706175446e-06, "loss": 0.6336, "step": 8717 }, { "epoch": 0.613727560718057, "grad_norm": 1.8831453323364258, "learning_rate": 4.800178871114711e-06, "loss": 0.7164, "step": 8718 }, { "epoch": 0.6137979584653291, "grad_norm": 1.866011619567871, "learning_rate": 4.798663685982571e-06, "loss": 0.816, "step": 8719 }, { "epoch": 0.6138683562126012, "grad_norm": 1.5246503353118896, "learning_rate": 4.797148615299896e-06, "loss": 0.6465, "step": 8720 }, { "epoch": 0.6139387539598733, "grad_norm": 2.540753126144409, "learning_rate": 4.795633659145463e-06, "loss": 0.7262, "step": 8721 }, { "epoch": 0.6140091517071453, "grad_norm": 1.8100754022598267, "learning_rate": 4.794118817598031e-06, "loss": 0.5873, "step": 8722 }, { "epoch": 0.6140795494544175, "grad_norm": 1.6505606174468994, "learning_rate": 4.792604090736362e-06, "loss": 0.6367, "step": 8723 }, { "epoch": 0.6141499472016896, "grad_norm": 2.0991785526275635, "learning_rate": 4.791089478639203e-06, "loss": 0.7336, "step": 8724 }, { "epoch": 0.6142203449489616, "grad_norm": 1.707631230354309, "learning_rate": 4.7895749813853005e-06, "loss": 0.5436, "step": 8725 }, { "epoch": 0.6142907426962337, "grad_norm": 1.7602643966674805, "learning_rate": 4.788060599053401e-06, "loss": 0.8078, "step": 8726 }, { "epoch": 0.6143611404435058, "grad_norm": 1.7059648036956787, "learning_rate": 4.786546331722232e-06, "loss": 0.6731, "step": 8727 }, { "epoch": 0.6144315381907779, "grad_norm": 1.8101017475128174, "learning_rate": 4.785032179470526e-06, "loss": 0.6395, "step": 8728 }, { "epoch": 0.61450193593805, "grad_norm": 1.5502848625183105, "learning_rate": 4.783518142377e-06, "loss": 0.6776, "step": 8729 }, { "epoch": 0.614572333685322, "grad_norm": 2.073719024658203, "learning_rate": 4.782004220520375e-06, "loss": 0.5961, "step": 8730 }, { "epoch": 0.6146427314325942, "grad_norm": 2.126835823059082, "learning_rate": 4.780490413979357e-06, "loss": 0.7126, "step": 8731 }, { "epoch": 0.6147131291798662, "grad_norm": 2.001443386077881, "learning_rate": 4.778976722832655e-06, "loss": 0.7421, "step": 8732 }, { "epoch": 0.6147835269271383, "grad_norm": 2.221947431564331, "learning_rate": 4.777463147158964e-06, "loss": 0.7111, "step": 8733 }, { "epoch": 0.6148539246744105, "grad_norm": 1.8115845918655396, "learning_rate": 4.775949687036977e-06, "loss": 0.6194, "step": 8734 }, { "epoch": 0.6149243224216825, "grad_norm": 1.597571849822998, "learning_rate": 4.774436342545379e-06, "loss": 0.7143, "step": 8735 }, { "epoch": 0.6149947201689546, "grad_norm": 1.7431799173355103, "learning_rate": 4.772923113762853e-06, "loss": 0.6162, "step": 8736 }, { "epoch": 0.6150651179162266, "grad_norm": 1.9278995990753174, "learning_rate": 4.771410000768069e-06, "loss": 0.7308, "step": 8737 }, { "epoch": 0.6151355156634988, "grad_norm": 2.2686023712158203, "learning_rate": 4.769897003639699e-06, "loss": 0.7439, "step": 8738 }, { "epoch": 0.6152059134107709, "grad_norm": 1.7720314264297485, "learning_rate": 4.768384122456402e-06, "loss": 0.6859, "step": 8739 }, { "epoch": 0.6152763111580429, "grad_norm": 1.8496757745742798, "learning_rate": 4.766871357296838e-06, "loss": 0.7017, "step": 8740 }, { "epoch": 0.6153467089053151, "grad_norm": 2.4582011699676514, "learning_rate": 4.765358708239651e-06, "loss": 0.6862, "step": 8741 }, { "epoch": 0.6154171066525871, "grad_norm": 1.6956257820129395, "learning_rate": 4.7638461753634935e-06, "loss": 0.7186, "step": 8742 }, { "epoch": 0.6154875043998592, "grad_norm": 2.125033140182495, "learning_rate": 4.762333758746993e-06, "loss": 0.6779, "step": 8743 }, { "epoch": 0.6155579021471312, "grad_norm": 1.6986947059631348, "learning_rate": 4.760821458468791e-06, "loss": 0.7144, "step": 8744 }, { "epoch": 0.6156282998944034, "grad_norm": 1.873706340789795, "learning_rate": 4.759309274607507e-06, "loss": 0.631, "step": 8745 }, { "epoch": 0.6156986976416755, "grad_norm": 1.8497536182403564, "learning_rate": 4.757797207241765e-06, "loss": 0.6852, "step": 8746 }, { "epoch": 0.6157690953889475, "grad_norm": 1.744938850402832, "learning_rate": 4.756285256450174e-06, "loss": 0.6537, "step": 8747 }, { "epoch": 0.6158394931362197, "grad_norm": 1.8188581466674805, "learning_rate": 4.754773422311349e-06, "loss": 0.7365, "step": 8748 }, { "epoch": 0.6159098908834917, "grad_norm": 1.7040146589279175, "learning_rate": 4.753261704903883e-06, "loss": 0.5759, "step": 8749 }, { "epoch": 0.6159802886307638, "grad_norm": 1.901617169380188, "learning_rate": 4.751750104306377e-06, "loss": 0.7688, "step": 8750 }, { "epoch": 0.616050686378036, "grad_norm": 1.8048738241195679, "learning_rate": 4.75023862059742e-06, "loss": 0.5645, "step": 8751 }, { "epoch": 0.616121084125308, "grad_norm": 1.6941165924072266, "learning_rate": 4.748727253855595e-06, "loss": 0.7277, "step": 8752 }, { "epoch": 0.6161914818725801, "grad_norm": 1.8987079858779907, "learning_rate": 4.747216004159476e-06, "loss": 0.639, "step": 8753 }, { "epoch": 0.6162618796198521, "grad_norm": 2.3584744930267334, "learning_rate": 4.7457048715876415e-06, "loss": 0.7146, "step": 8754 }, { "epoch": 0.6163322773671243, "grad_norm": 1.954515814781189, "learning_rate": 4.744193856218649e-06, "loss": 0.7349, "step": 8755 }, { "epoch": 0.6164026751143964, "grad_norm": 1.896977424621582, "learning_rate": 4.742682958131062e-06, "loss": 0.61, "step": 8756 }, { "epoch": 0.6164730728616684, "grad_norm": 1.7407375574111938, "learning_rate": 4.741172177403432e-06, "loss": 0.7351, "step": 8757 }, { "epoch": 0.6165434706089405, "grad_norm": 1.9213669300079346, "learning_rate": 4.739661514114308e-06, "loss": 0.6701, "step": 8758 }, { "epoch": 0.6166138683562126, "grad_norm": 1.7061903476715088, "learning_rate": 4.738150968342225e-06, "loss": 0.7302, "step": 8759 }, { "epoch": 0.6166842661034847, "grad_norm": 1.780179500579834, "learning_rate": 4.736640540165727e-06, "loss": 0.695, "step": 8760 }, { "epoch": 0.6167546638507567, "grad_norm": 1.2619142532348633, "learning_rate": 4.735130229663332e-06, "loss": 0.7406, "step": 8761 }, { "epoch": 0.6168250615980289, "grad_norm": 2.0412163734436035, "learning_rate": 4.733620036913571e-06, "loss": 0.7751, "step": 8762 }, { "epoch": 0.616895459345301, "grad_norm": 1.5057686567306519, "learning_rate": 4.732109961994955e-06, "loss": 0.6874, "step": 8763 }, { "epoch": 0.616965857092573, "grad_norm": 2.337124824523926, "learning_rate": 4.730600004985998e-06, "loss": 0.5928, "step": 8764 }, { "epoch": 0.6170362548398451, "grad_norm": 1.699820637702942, "learning_rate": 4.729090165965198e-06, "loss": 0.4585, "step": 8765 }, { "epoch": 0.6171066525871172, "grad_norm": 1.4827662706375122, "learning_rate": 4.727580445011063e-06, "loss": 0.6378, "step": 8766 }, { "epoch": 0.6171770503343893, "grad_norm": 1.9038209915161133, "learning_rate": 4.726070842202073e-06, "loss": 0.7766, "step": 8767 }, { "epoch": 0.6172474480816614, "grad_norm": 1.7221735715866089, "learning_rate": 4.724561357616723e-06, "loss": 0.6903, "step": 8768 }, { "epoch": 0.6173178458289335, "grad_norm": 1.623178243637085, "learning_rate": 4.723051991333487e-06, "loss": 0.6685, "step": 8769 }, { "epoch": 0.6173882435762056, "grad_norm": 2.0614614486694336, "learning_rate": 4.721542743430841e-06, "loss": 0.7102, "step": 8770 }, { "epoch": 0.6174586413234776, "grad_norm": 1.898468255996704, "learning_rate": 4.720033613987249e-06, "loss": 0.7112, "step": 8771 }, { "epoch": 0.6175290390707497, "grad_norm": 2.0207719802856445, "learning_rate": 4.7185246030811765e-06, "loss": 0.5445, "step": 8772 }, { "epoch": 0.6175994368180219, "grad_norm": 1.5107011795043945, "learning_rate": 4.717015710791075e-06, "loss": 0.6138, "step": 8773 }, { "epoch": 0.6176698345652939, "grad_norm": 1.7142055034637451, "learning_rate": 4.715506937195396e-06, "loss": 0.5899, "step": 8774 }, { "epoch": 0.617740232312566, "grad_norm": 1.8262277841567993, "learning_rate": 4.713998282372579e-06, "loss": 0.5676, "step": 8775 }, { "epoch": 0.617810630059838, "grad_norm": 1.761788249015808, "learning_rate": 4.71248974640106e-06, "loss": 0.6826, "step": 8776 }, { "epoch": 0.6178810278071102, "grad_norm": 2.01226806640625, "learning_rate": 4.710981329359273e-06, "loss": 0.8385, "step": 8777 }, { "epoch": 0.6179514255543822, "grad_norm": 1.9364218711853027, "learning_rate": 4.709473031325639e-06, "loss": 0.7134, "step": 8778 }, { "epoch": 0.6180218233016543, "grad_norm": 1.7863322496414185, "learning_rate": 4.7079648523785785e-06, "loss": 0.7405, "step": 8779 }, { "epoch": 0.6180922210489265, "grad_norm": 1.7984915971755981, "learning_rate": 4.706456792596499e-06, "loss": 0.8367, "step": 8780 }, { "epoch": 0.6181626187961985, "grad_norm": 1.724253535270691, "learning_rate": 4.704948852057809e-06, "loss": 0.6273, "step": 8781 }, { "epoch": 0.6182330165434706, "grad_norm": 1.5767030715942383, "learning_rate": 4.703441030840904e-06, "loss": 0.6819, "step": 8782 }, { "epoch": 0.6183034142907426, "grad_norm": 2.218404769897461, "learning_rate": 4.701933329024183e-06, "loss": 0.6162, "step": 8783 }, { "epoch": 0.6183738120380148, "grad_norm": 1.6901092529296875, "learning_rate": 4.700425746686028e-06, "loss": 0.6148, "step": 8784 }, { "epoch": 0.6184442097852869, "grad_norm": 1.706143856048584, "learning_rate": 4.698918283904822e-06, "loss": 0.518, "step": 8785 }, { "epoch": 0.6185146075325589, "grad_norm": 1.8324304819107056, "learning_rate": 4.697410940758937e-06, "loss": 0.6122, "step": 8786 }, { "epoch": 0.6185850052798311, "grad_norm": 1.75962233543396, "learning_rate": 4.6959037173267455e-06, "loss": 0.8506, "step": 8787 }, { "epoch": 0.6186554030271031, "grad_norm": 1.9706813097000122, "learning_rate": 4.694396613686601e-06, "loss": 0.6771, "step": 8788 }, { "epoch": 0.6187258007743752, "grad_norm": 1.9806902408599854, "learning_rate": 4.69288962991687e-06, "loss": 0.7546, "step": 8789 }, { "epoch": 0.6187961985216474, "grad_norm": 1.6347546577453613, "learning_rate": 4.6913827660958935e-06, "loss": 0.7338, "step": 8790 }, { "epoch": 0.6188665962689194, "grad_norm": 1.9093701839447021, "learning_rate": 4.689876022302021e-06, "loss": 0.6416, "step": 8791 }, { "epoch": 0.6189369940161915, "grad_norm": 2.1019139289855957, "learning_rate": 4.688369398613581e-06, "loss": 0.7337, "step": 8792 }, { "epoch": 0.6190073917634635, "grad_norm": 1.6005642414093018, "learning_rate": 4.6868628951089155e-06, "loss": 0.5845, "step": 8793 }, { "epoch": 0.6190777895107357, "grad_norm": 1.731537938117981, "learning_rate": 4.685356511866338e-06, "loss": 0.6718, "step": 8794 }, { "epoch": 0.6191481872580078, "grad_norm": 2.004443645477295, "learning_rate": 4.683850248964174e-06, "loss": 0.6077, "step": 8795 }, { "epoch": 0.6192185850052798, "grad_norm": 2.191026449203491, "learning_rate": 4.682344106480733e-06, "loss": 0.698, "step": 8796 }, { "epoch": 0.619288982752552, "grad_norm": 1.743820071220398, "learning_rate": 4.680838084494322e-06, "loss": 0.6474, "step": 8797 }, { "epoch": 0.619359380499824, "grad_norm": 1.7968326807022095, "learning_rate": 4.679332183083236e-06, "loss": 0.6957, "step": 8798 }, { "epoch": 0.6194297782470961, "grad_norm": 1.7209839820861816, "learning_rate": 4.677826402325777e-06, "loss": 0.6451, "step": 8799 }, { "epoch": 0.6195001759943681, "grad_norm": 1.6545354127883911, "learning_rate": 4.67632074230022e-06, "loss": 0.5857, "step": 8800 }, { "epoch": 0.6195705737416403, "grad_norm": 1.7954758405685425, "learning_rate": 4.674815203084857e-06, "loss": 0.7472, "step": 8801 }, { "epoch": 0.6196409714889124, "grad_norm": 2.4807910919189453, "learning_rate": 4.673309784757955e-06, "loss": 0.8339, "step": 8802 }, { "epoch": 0.6197113692361844, "grad_norm": 2.9660067558288574, "learning_rate": 4.671804487397785e-06, "loss": 0.6188, "step": 8803 }, { "epoch": 0.6197817669834566, "grad_norm": 1.869012475013733, "learning_rate": 4.6702993110826075e-06, "loss": 0.6832, "step": 8804 }, { "epoch": 0.6198521647307286, "grad_norm": 1.857616662979126, "learning_rate": 4.6687942558906815e-06, "loss": 0.7328, "step": 8805 }, { "epoch": 0.6199225624780007, "grad_norm": 1.7213220596313477, "learning_rate": 4.66728932190025e-06, "loss": 0.6039, "step": 8806 }, { "epoch": 0.6199929602252728, "grad_norm": 1.6209149360656738, "learning_rate": 4.665784509189561e-06, "loss": 0.6038, "step": 8807 }, { "epoch": 0.6200633579725449, "grad_norm": 1.7148208618164062, "learning_rate": 4.664279817836849e-06, "loss": 0.5814, "step": 8808 }, { "epoch": 0.620133755719817, "grad_norm": 1.8417840003967285, "learning_rate": 4.662775247920346e-06, "loss": 0.6002, "step": 8809 }, { "epoch": 0.620204153467089, "grad_norm": 1.8675941228866577, "learning_rate": 4.661270799518272e-06, "loss": 0.7369, "step": 8810 }, { "epoch": 0.6202745512143611, "grad_norm": 2.162395715713501, "learning_rate": 4.65976647270885e-06, "loss": 0.6888, "step": 8811 }, { "epoch": 0.6203449489616333, "grad_norm": 1.7724957466125488, "learning_rate": 4.658262267570286e-06, "loss": 0.6437, "step": 8812 }, { "epoch": 0.6204153467089053, "grad_norm": 1.7854640483856201, "learning_rate": 4.656758184180789e-06, "loss": 0.7124, "step": 8813 }, { "epoch": 0.6204857444561774, "grad_norm": 1.8302967548370361, "learning_rate": 4.6552542226185554e-06, "loss": 0.6763, "step": 8814 }, { "epoch": 0.6205561422034495, "grad_norm": 1.9310803413391113, "learning_rate": 4.653750382961779e-06, "loss": 0.5727, "step": 8815 }, { "epoch": 0.6206265399507216, "grad_norm": 3.0925021171569824, "learning_rate": 4.652246665288642e-06, "loss": 0.7443, "step": 8816 }, { "epoch": 0.6206969376979936, "grad_norm": 1.7223565578460693, "learning_rate": 4.650743069677333e-06, "loss": 0.6793, "step": 8817 }, { "epoch": 0.6207673354452657, "grad_norm": 1.7297208309173584, "learning_rate": 4.649239596206014e-06, "loss": 0.6774, "step": 8818 }, { "epoch": 0.6208377331925379, "grad_norm": 3.186077356338501, "learning_rate": 4.6477362449528606e-06, "loss": 0.6815, "step": 8819 }, { "epoch": 0.6209081309398099, "grad_norm": 1.9918491840362549, "learning_rate": 4.646233015996029e-06, "loss": 0.6736, "step": 8820 }, { "epoch": 0.620978528687082, "grad_norm": 1.8663791418075562, "learning_rate": 4.644729909413675e-06, "loss": 0.5899, "step": 8821 }, { "epoch": 0.621048926434354, "grad_norm": 2.3952815532684326, "learning_rate": 4.643226925283945e-06, "loss": 0.575, "step": 8822 }, { "epoch": 0.6211193241816262, "grad_norm": 2.21958065032959, "learning_rate": 4.6417240636849825e-06, "loss": 0.6627, "step": 8823 }, { "epoch": 0.6211897219288983, "grad_norm": 2.1999599933624268, "learning_rate": 4.640221324694921e-06, "loss": 0.6885, "step": 8824 }, { "epoch": 0.6212601196761703, "grad_norm": 1.8459200859069824, "learning_rate": 4.638718708391891e-06, "loss": 0.6962, "step": 8825 }, { "epoch": 0.6213305174234425, "grad_norm": 2.4423933029174805, "learning_rate": 4.637216214854014e-06, "loss": 0.7715, "step": 8826 }, { "epoch": 0.6214009151707145, "grad_norm": 1.9009371995925903, "learning_rate": 4.635713844159403e-06, "loss": 0.6111, "step": 8827 }, { "epoch": 0.6214713129179866, "grad_norm": 1.6686160564422607, "learning_rate": 4.634211596386174e-06, "loss": 0.7214, "step": 8828 }, { "epoch": 0.6215417106652588, "grad_norm": 1.8055163621902466, "learning_rate": 4.632709471612425e-06, "loss": 0.6183, "step": 8829 }, { "epoch": 0.6216121084125308, "grad_norm": 1.7723884582519531, "learning_rate": 4.6312074699162556e-06, "loss": 0.5887, "step": 8830 }, { "epoch": 0.6216825061598029, "grad_norm": 1.9250534772872925, "learning_rate": 4.6297055913757535e-06, "loss": 0.7889, "step": 8831 }, { "epoch": 0.6217529039070749, "grad_norm": 1.7785691022872925, "learning_rate": 4.628203836069007e-06, "loss": 0.6983, "step": 8832 }, { "epoch": 0.6218233016543471, "grad_norm": 2.0277578830718994, "learning_rate": 4.626702204074087e-06, "loss": 0.6828, "step": 8833 }, { "epoch": 0.6218936994016191, "grad_norm": 1.9140089750289917, "learning_rate": 4.625200695469072e-06, "loss": 0.6877, "step": 8834 }, { "epoch": 0.6219640971488912, "grad_norm": 2.4136857986450195, "learning_rate": 4.623699310332023e-06, "loss": 0.6652, "step": 8835 }, { "epoch": 0.6220344948961634, "grad_norm": 1.936915397644043, "learning_rate": 4.622198048741e-06, "loss": 0.6755, "step": 8836 }, { "epoch": 0.6221048926434354, "grad_norm": 1.710368037223816, "learning_rate": 4.620696910774052e-06, "loss": 0.6381, "step": 8837 }, { "epoch": 0.6221752903907075, "grad_norm": 1.828612208366394, "learning_rate": 4.619195896509229e-06, "loss": 0.6742, "step": 8838 }, { "epoch": 0.6222456881379795, "grad_norm": 1.6009294986724854, "learning_rate": 4.617695006024563e-06, "loss": 0.6826, "step": 8839 }, { "epoch": 0.6223160858852517, "grad_norm": 1.8125178813934326, "learning_rate": 4.616194239398094e-06, "loss": 0.7334, "step": 8840 }, { "epoch": 0.6223864836325238, "grad_norm": 2.1499056816101074, "learning_rate": 4.614693596707844e-06, "loss": 0.7535, "step": 8841 }, { "epoch": 0.6224568813797958, "grad_norm": 1.9501922130584717, "learning_rate": 4.613193078031838e-06, "loss": 0.7755, "step": 8842 }, { "epoch": 0.622527279127068, "grad_norm": 1.7487584352493286, "learning_rate": 4.611692683448082e-06, "loss": 0.6057, "step": 8843 }, { "epoch": 0.62259767687434, "grad_norm": 1.7857738733291626, "learning_rate": 4.610192413034588e-06, "loss": 0.6407, "step": 8844 }, { "epoch": 0.6226680746216121, "grad_norm": 1.91888427734375, "learning_rate": 4.6086922668693515e-06, "loss": 0.627, "step": 8845 }, { "epoch": 0.6227384723688842, "grad_norm": 1.6778323650360107, "learning_rate": 4.607192245030374e-06, "loss": 0.7498, "step": 8846 }, { "epoch": 0.6228088701161563, "grad_norm": 1.9219059944152832, "learning_rate": 4.605692347595637e-06, "loss": 0.746, "step": 8847 }, { "epoch": 0.6228792678634284, "grad_norm": 2.783186674118042, "learning_rate": 4.604192574643124e-06, "loss": 0.6492, "step": 8848 }, { "epoch": 0.6229496656107004, "grad_norm": 1.661230206489563, "learning_rate": 4.6026929262508056e-06, "loss": 0.7021, "step": 8849 }, { "epoch": 0.6230200633579726, "grad_norm": 1.8668944835662842, "learning_rate": 4.601193402496658e-06, "loss": 0.8035, "step": 8850 }, { "epoch": 0.6230904611052447, "grad_norm": 1.5594483613967896, "learning_rate": 4.599694003458634e-06, "loss": 0.5987, "step": 8851 }, { "epoch": 0.6231608588525167, "grad_norm": 1.7366489171981812, "learning_rate": 4.598194729214695e-06, "loss": 0.6074, "step": 8852 }, { "epoch": 0.6232312565997888, "grad_norm": 2.0236361026763916, "learning_rate": 4.596695579842786e-06, "loss": 0.6175, "step": 8853 }, { "epoch": 0.6233016543470609, "grad_norm": 1.911348819732666, "learning_rate": 4.595196555420852e-06, "loss": 0.6882, "step": 8854 }, { "epoch": 0.623372052094333, "grad_norm": 1.687758207321167, "learning_rate": 4.5936976560268235e-06, "loss": 0.7294, "step": 8855 }, { "epoch": 0.623442449841605, "grad_norm": 1.4993699789047241, "learning_rate": 4.592198881738638e-06, "loss": 0.6791, "step": 8856 }, { "epoch": 0.6235128475888772, "grad_norm": 1.8500275611877441, "learning_rate": 4.590700232634209e-06, "loss": 0.6053, "step": 8857 }, { "epoch": 0.6235832453361493, "grad_norm": 1.473334789276123, "learning_rate": 4.589201708791458e-06, "loss": 0.6543, "step": 8858 }, { "epoch": 0.6236536430834213, "grad_norm": 1.7584030628204346, "learning_rate": 4.587703310288294e-06, "loss": 0.6251, "step": 8859 }, { "epoch": 0.6237240408306934, "grad_norm": 1.8502508401870728, "learning_rate": 4.58620503720262e-06, "loss": 0.6141, "step": 8860 }, { "epoch": 0.6237944385779655, "grad_norm": 1.4718788862228394, "learning_rate": 4.5847068896123305e-06, "loss": 0.5937, "step": 8861 }, { "epoch": 0.6238648363252376, "grad_norm": 1.9724866151809692, "learning_rate": 4.583208867595321e-06, "loss": 0.6649, "step": 8862 }, { "epoch": 0.6239352340725097, "grad_norm": 1.8651230335235596, "learning_rate": 4.581710971229467e-06, "loss": 0.8245, "step": 8863 }, { "epoch": 0.6240056318197817, "grad_norm": 1.6077384948730469, "learning_rate": 4.580213200592652e-06, "loss": 0.6449, "step": 8864 }, { "epoch": 0.6240760295670539, "grad_norm": 1.6055594682693481, "learning_rate": 4.5787155557627435e-06, "loss": 0.6774, "step": 8865 }, { "epoch": 0.6241464273143259, "grad_norm": 1.8255889415740967, "learning_rate": 4.577218036817607e-06, "loss": 0.6678, "step": 8866 }, { "epoch": 0.624216825061598, "grad_norm": 1.8783512115478516, "learning_rate": 4.575720643835097e-06, "loss": 0.6812, "step": 8867 }, { "epoch": 0.6242872228088702, "grad_norm": 1.6803581714630127, "learning_rate": 4.5742233768930695e-06, "loss": 0.6624, "step": 8868 }, { "epoch": 0.6243576205561422, "grad_norm": 1.6195125579833984, "learning_rate": 4.572726236069361e-06, "loss": 0.6215, "step": 8869 }, { "epoch": 0.6244280183034143, "grad_norm": 1.97798752784729, "learning_rate": 4.571229221441818e-06, "loss": 0.6342, "step": 8870 }, { "epoch": 0.6244984160506863, "grad_norm": 1.5793315172195435, "learning_rate": 4.569732333088266e-06, "loss": 0.5222, "step": 8871 }, { "epoch": 0.6245688137979585, "grad_norm": 1.7505803108215332, "learning_rate": 4.5682355710865305e-06, "loss": 0.6465, "step": 8872 }, { "epoch": 0.6246392115452305, "grad_norm": 2.1643571853637695, "learning_rate": 4.566738935514429e-06, "loss": 0.6588, "step": 8873 }, { "epoch": 0.6247096092925026, "grad_norm": 1.9295819997787476, "learning_rate": 4.5652424264497765e-06, "loss": 0.7442, "step": 8874 }, { "epoch": 0.6247800070397748, "grad_norm": 1.8487359285354614, "learning_rate": 4.563746043970372e-06, "loss": 0.7059, "step": 8875 }, { "epoch": 0.6248504047870468, "grad_norm": 1.921670913696289, "learning_rate": 4.562249788154019e-06, "loss": 0.6999, "step": 8876 }, { "epoch": 0.6249208025343189, "grad_norm": 1.9929125308990479, "learning_rate": 4.560753659078508e-06, "loss": 0.6738, "step": 8877 }, { "epoch": 0.6249912002815909, "grad_norm": 1.8091833591461182, "learning_rate": 4.559257656821621e-06, "loss": 0.6671, "step": 8878 }, { "epoch": 0.6250615980288631, "grad_norm": 1.6576651334762573, "learning_rate": 4.557761781461142e-06, "loss": 0.7232, "step": 8879 }, { "epoch": 0.6251319957761352, "grad_norm": 1.9018535614013672, "learning_rate": 4.556266033074838e-06, "loss": 0.6798, "step": 8880 }, { "epoch": 0.6252023935234072, "grad_norm": 1.8656281232833862, "learning_rate": 4.554770411740477e-06, "loss": 0.7031, "step": 8881 }, { "epoch": 0.6252727912706794, "grad_norm": 2.029564380645752, "learning_rate": 4.553274917535816e-06, "loss": 0.6956, "step": 8882 }, { "epoch": 0.6253431890179514, "grad_norm": 1.7999827861785889, "learning_rate": 4.551779550538609e-06, "loss": 0.7467, "step": 8883 }, { "epoch": 0.6254135867652235, "grad_norm": 1.668978214263916, "learning_rate": 4.550284310826597e-06, "loss": 0.6233, "step": 8884 }, { "epoch": 0.6254839845124956, "grad_norm": 1.926210880279541, "learning_rate": 4.548789198477526e-06, "loss": 0.6119, "step": 8885 }, { "epoch": 0.6255543822597677, "grad_norm": 1.9455151557922363, "learning_rate": 4.547294213569124e-06, "loss": 0.6351, "step": 8886 }, { "epoch": 0.6256247800070398, "grad_norm": 1.8759804964065552, "learning_rate": 4.5457993561791165e-06, "loss": 0.5917, "step": 8887 }, { "epoch": 0.6256951777543118, "grad_norm": 1.9093433618545532, "learning_rate": 4.544304626385223e-06, "loss": 0.7414, "step": 8888 }, { "epoch": 0.625765575501584, "grad_norm": 1.7155686616897583, "learning_rate": 4.542810024265157e-06, "loss": 0.719, "step": 8889 }, { "epoch": 0.625835973248856, "grad_norm": 1.9360209703445435, "learning_rate": 4.54131554989662e-06, "loss": 0.6376, "step": 8890 }, { "epoch": 0.6259063709961281, "grad_norm": 2.2942371368408203, "learning_rate": 4.539821203357318e-06, "loss": 0.7347, "step": 8891 }, { "epoch": 0.6259767687434002, "grad_norm": 1.882985234260559, "learning_rate": 4.538326984724938e-06, "loss": 0.7137, "step": 8892 }, { "epoch": 0.6260471664906723, "grad_norm": 1.812325119972229, "learning_rate": 4.5368328940771685e-06, "loss": 0.743, "step": 8893 }, { "epoch": 0.6261175642379444, "grad_norm": 1.7374353408813477, "learning_rate": 4.535338931491687e-06, "loss": 0.6531, "step": 8894 }, { "epoch": 0.6261879619852164, "grad_norm": 1.9901626110076904, "learning_rate": 4.533845097046168e-06, "loss": 0.6075, "step": 8895 }, { "epoch": 0.6262583597324886, "grad_norm": 1.9554730653762817, "learning_rate": 4.532351390818272e-06, "loss": 0.7893, "step": 8896 }, { "epoch": 0.6263287574797607, "grad_norm": 1.9355320930480957, "learning_rate": 4.530857812885665e-06, "loss": 0.6227, "step": 8897 }, { "epoch": 0.6263991552270327, "grad_norm": 1.94437837600708, "learning_rate": 4.529364363325996e-06, "loss": 0.7253, "step": 8898 }, { "epoch": 0.6264695529743048, "grad_norm": 1.732561707496643, "learning_rate": 4.5278710422169125e-06, "loss": 0.6789, "step": 8899 }, { "epoch": 0.6265399507215769, "grad_norm": 1.4828764200210571, "learning_rate": 4.526377849636049e-06, "loss": 0.6339, "step": 8900 }, { "epoch": 0.626610348468849, "grad_norm": 1.6962684392929077, "learning_rate": 4.524884785661046e-06, "loss": 0.6684, "step": 8901 }, { "epoch": 0.6266807462161211, "grad_norm": 2.6839118003845215, "learning_rate": 4.5233918503695194e-06, "loss": 0.8658, "step": 8902 }, { "epoch": 0.6267511439633932, "grad_norm": 1.760019063949585, "learning_rate": 4.521899043839098e-06, "loss": 0.6916, "step": 8903 }, { "epoch": 0.6268215417106653, "grad_norm": 1.9874176979064941, "learning_rate": 4.520406366147387e-06, "loss": 0.6286, "step": 8904 }, { "epoch": 0.6268919394579373, "grad_norm": 1.7570241689682007, "learning_rate": 4.518913817371996e-06, "loss": 0.6611, "step": 8905 }, { "epoch": 0.6269623372052094, "grad_norm": 2.332871675491333, "learning_rate": 4.517421397590521e-06, "loss": 0.6747, "step": 8906 }, { "epoch": 0.6270327349524816, "grad_norm": 1.9562926292419434, "learning_rate": 4.515929106880558e-06, "loss": 0.6989, "step": 8907 }, { "epoch": 0.6271031326997536, "grad_norm": 3.8486785888671875, "learning_rate": 4.514436945319688e-06, "loss": 0.7272, "step": 8908 }, { "epoch": 0.6271735304470257, "grad_norm": 1.870395302772522, "learning_rate": 4.512944912985495e-06, "loss": 0.6484, "step": 8909 }, { "epoch": 0.6272439281942978, "grad_norm": 1.8952008485794067, "learning_rate": 4.5114530099555455e-06, "loss": 0.771, "step": 8910 }, { "epoch": 0.6273143259415699, "grad_norm": 2.2265756130218506, "learning_rate": 4.509961236307411e-06, "loss": 0.6242, "step": 8911 }, { "epoch": 0.6273847236888419, "grad_norm": 1.8128106594085693, "learning_rate": 4.508469592118643e-06, "loss": 0.5825, "step": 8912 }, { "epoch": 0.627455121436114, "grad_norm": 1.7894340753555298, "learning_rate": 4.506978077466803e-06, "loss": 0.6756, "step": 8913 }, { "epoch": 0.6275255191833862, "grad_norm": 1.820526361465454, "learning_rate": 4.5054866924294244e-06, "loss": 0.7182, "step": 8914 }, { "epoch": 0.6275959169306582, "grad_norm": 1.7979587316513062, "learning_rate": 4.503995437084055e-06, "loss": 0.6916, "step": 8915 }, { "epoch": 0.6276663146779303, "grad_norm": 2.0242249965667725, "learning_rate": 4.502504311508223e-06, "loss": 0.6881, "step": 8916 }, { "epoch": 0.6277367124252023, "grad_norm": 2.0753743648529053, "learning_rate": 4.501013315779455e-06, "loss": 0.7936, "step": 8917 }, { "epoch": 0.6278071101724745, "grad_norm": 1.6494354009628296, "learning_rate": 4.499522449975264e-06, "loss": 0.7209, "step": 8918 }, { "epoch": 0.6278775079197466, "grad_norm": 1.8652344942092896, "learning_rate": 4.498031714173171e-06, "loss": 0.6712, "step": 8919 }, { "epoch": 0.6279479056670186, "grad_norm": 1.961323857307434, "learning_rate": 4.49654110845067e-06, "loss": 0.6701, "step": 8920 }, { "epoch": 0.6280183034142908, "grad_norm": 2.0352137088775635, "learning_rate": 4.495050632885267e-06, "loss": 0.8538, "step": 8921 }, { "epoch": 0.6280887011615628, "grad_norm": 1.7597757577896118, "learning_rate": 4.493560287554449e-06, "loss": 0.6143, "step": 8922 }, { "epoch": 0.6281590989088349, "grad_norm": 1.666231632232666, "learning_rate": 4.492070072535703e-06, "loss": 0.6356, "step": 8923 }, { "epoch": 0.6282294966561071, "grad_norm": 1.6170204877853394, "learning_rate": 4.4905799879065025e-06, "loss": 0.6592, "step": 8924 }, { "epoch": 0.6282998944033791, "grad_norm": 2.912653684616089, "learning_rate": 4.489090033744325e-06, "loss": 0.7391, "step": 8925 }, { "epoch": 0.6283702921506512, "grad_norm": 1.74045729637146, "learning_rate": 4.487600210126627e-06, "loss": 0.7093, "step": 8926 }, { "epoch": 0.6284406898979232, "grad_norm": 2.0052804946899414, "learning_rate": 4.48611051713087e-06, "loss": 0.6913, "step": 8927 }, { "epoch": 0.6285110876451954, "grad_norm": 1.8702497482299805, "learning_rate": 4.484620954834505e-06, "loss": 0.6438, "step": 8928 }, { "epoch": 0.6285814853924674, "grad_norm": 2.2539100646972656, "learning_rate": 4.483131523314974e-06, "loss": 0.7963, "step": 8929 }, { "epoch": 0.6286518831397395, "grad_norm": 1.5418449640274048, "learning_rate": 4.481642222649715e-06, "loss": 0.5593, "step": 8930 }, { "epoch": 0.6287222808870117, "grad_norm": 2.1155731678009033, "learning_rate": 4.480153052916157e-06, "loss": 0.7637, "step": 8931 }, { "epoch": 0.6287926786342837, "grad_norm": 1.8812837600708008, "learning_rate": 4.478664014191725e-06, "loss": 0.7016, "step": 8932 }, { "epoch": 0.6288630763815558, "grad_norm": 1.7704731225967407, "learning_rate": 4.4771751065538336e-06, "loss": 0.661, "step": 8933 }, { "epoch": 0.6289334741288278, "grad_norm": 2.163632392883301, "learning_rate": 4.475686330079894e-06, "loss": 0.6843, "step": 8934 }, { "epoch": 0.6290038718761, "grad_norm": 2.2507331371307373, "learning_rate": 4.4741976848473065e-06, "loss": 0.6589, "step": 8935 }, { "epoch": 0.6290742696233721, "grad_norm": 2.0214552879333496, "learning_rate": 4.4727091709334705e-06, "loss": 0.7158, "step": 8936 }, { "epoch": 0.6291446673706441, "grad_norm": 2.1184444427490234, "learning_rate": 4.471220788415772e-06, "loss": 0.6321, "step": 8937 }, { "epoch": 0.6292150651179163, "grad_norm": 1.9412455558776855, "learning_rate": 4.469732537371597e-06, "loss": 0.7002, "step": 8938 }, { "epoch": 0.6292854628651883, "grad_norm": 1.798052430152893, "learning_rate": 4.4682444178783185e-06, "loss": 0.6418, "step": 8939 }, { "epoch": 0.6293558606124604, "grad_norm": 3.0892040729522705, "learning_rate": 4.466756430013306e-06, "loss": 0.6337, "step": 8940 }, { "epoch": 0.6294262583597325, "grad_norm": 1.5022046566009521, "learning_rate": 4.465268573853918e-06, "loss": 0.6904, "step": 8941 }, { "epoch": 0.6294966561070046, "grad_norm": 1.737906575202942, "learning_rate": 4.4637808494775155e-06, "loss": 0.4896, "step": 8942 }, { "epoch": 0.6295670538542767, "grad_norm": 1.8464139699935913, "learning_rate": 4.462293256961441e-06, "loss": 0.6529, "step": 8943 }, { "epoch": 0.6296374516015487, "grad_norm": 1.980373501777649, "learning_rate": 4.460805796383042e-06, "loss": 0.8006, "step": 8944 }, { "epoch": 0.6297078493488208, "grad_norm": 1.601212501525879, "learning_rate": 4.459318467819647e-06, "loss": 0.741, "step": 8945 }, { "epoch": 0.629778247096093, "grad_norm": 1.6373995542526245, "learning_rate": 4.457831271348586e-06, "loss": 0.6357, "step": 8946 }, { "epoch": 0.629848644843365, "grad_norm": 1.6908868551254272, "learning_rate": 4.456344207047178e-06, "loss": 0.6065, "step": 8947 }, { "epoch": 0.6299190425906371, "grad_norm": 1.9284040927886963, "learning_rate": 4.4548572749927405e-06, "loss": 0.6727, "step": 8948 }, { "epoch": 0.6299894403379092, "grad_norm": 1.7732658386230469, "learning_rate": 4.453370475262577e-06, "loss": 0.6713, "step": 8949 }, { "epoch": 0.6300598380851813, "grad_norm": 2.1013684272766113, "learning_rate": 4.451883807933989e-06, "loss": 0.711, "step": 8950 }, { "epoch": 0.6301302358324533, "grad_norm": 1.6525382995605469, "learning_rate": 4.450397273084269e-06, "loss": 0.5905, "step": 8951 }, { "epoch": 0.6302006335797254, "grad_norm": 1.8177704811096191, "learning_rate": 4.4489108707907075e-06, "loss": 0.619, "step": 8952 }, { "epoch": 0.6302710313269976, "grad_norm": 1.7273306846618652, "learning_rate": 4.447424601130576e-06, "loss": 0.7432, "step": 8953 }, { "epoch": 0.6303414290742696, "grad_norm": 1.7753610610961914, "learning_rate": 4.445938464181154e-06, "loss": 0.6562, "step": 8954 }, { "epoch": 0.6304118268215417, "grad_norm": 1.9042919874191284, "learning_rate": 4.444452460019702e-06, "loss": 0.6993, "step": 8955 }, { "epoch": 0.6304822245688138, "grad_norm": 1.5312023162841797, "learning_rate": 4.442966588723483e-06, "loss": 0.611, "step": 8956 }, { "epoch": 0.6305526223160859, "grad_norm": 2.3948497772216797, "learning_rate": 4.441480850369745e-06, "loss": 0.6585, "step": 8957 }, { "epoch": 0.630623020063358, "grad_norm": 2.1842124462127686, "learning_rate": 4.4399952450357374e-06, "loss": 0.7573, "step": 8958 }, { "epoch": 0.63069341781063, "grad_norm": 1.6437325477600098, "learning_rate": 4.438509772798691e-06, "loss": 0.641, "step": 8959 }, { "epoch": 0.6307638155579022, "grad_norm": 1.9346373081207275, "learning_rate": 4.437024433735846e-06, "loss": 0.7149, "step": 8960 }, { "epoch": 0.6308342133051742, "grad_norm": 1.7528678178787231, "learning_rate": 4.435539227924419e-06, "loss": 0.5203, "step": 8961 }, { "epoch": 0.6309046110524463, "grad_norm": 2.01584792137146, "learning_rate": 4.434054155441632e-06, "loss": 0.6299, "step": 8962 }, { "epoch": 0.6309750087997185, "grad_norm": 1.8728141784667969, "learning_rate": 4.432569216364691e-06, "loss": 0.7016, "step": 8963 }, { "epoch": 0.6310454065469905, "grad_norm": 1.567643165588379, "learning_rate": 4.431084410770806e-06, "loss": 0.7043, "step": 8964 }, { "epoch": 0.6311158042942626, "grad_norm": 1.826465129852295, "learning_rate": 4.429599738737164e-06, "loss": 0.734, "step": 8965 }, { "epoch": 0.6311862020415346, "grad_norm": 1.803178310394287, "learning_rate": 4.428115200340961e-06, "loss": 0.7355, "step": 8966 }, { "epoch": 0.6312565997888068, "grad_norm": 1.636277675628662, "learning_rate": 4.426630795659379e-06, "loss": 0.8202, "step": 8967 }, { "epoch": 0.6313269975360788, "grad_norm": 1.6653649806976318, "learning_rate": 4.425146524769592e-06, "loss": 0.6007, "step": 8968 }, { "epoch": 0.6313973952833509, "grad_norm": 1.779578685760498, "learning_rate": 4.4236623877487675e-06, "loss": 0.727, "step": 8969 }, { "epoch": 0.6314677930306231, "grad_norm": 1.9371278285980225, "learning_rate": 4.422178384674073e-06, "loss": 0.6408, "step": 8970 }, { "epoch": 0.6315381907778951, "grad_norm": 1.7691351175308228, "learning_rate": 4.420694515622654e-06, "loss": 0.7197, "step": 8971 }, { "epoch": 0.6316085885251672, "grad_norm": 1.7146881818771362, "learning_rate": 4.419210780671667e-06, "loss": 0.7101, "step": 8972 }, { "epoch": 0.6316789862724392, "grad_norm": 2.0774588584899902, "learning_rate": 4.417727179898247e-06, "loss": 0.6653, "step": 8973 }, { "epoch": 0.6317493840197114, "grad_norm": 1.8786152601242065, "learning_rate": 4.41624371337953e-06, "loss": 0.625, "step": 8974 }, { "epoch": 0.6318197817669835, "grad_norm": 2.0342841148376465, "learning_rate": 4.4147603811926405e-06, "loss": 0.6363, "step": 8975 }, { "epoch": 0.6318901795142555, "grad_norm": 1.5877940654754639, "learning_rate": 4.413277183414701e-06, "loss": 0.6009, "step": 8976 }, { "epoch": 0.6319605772615277, "grad_norm": 1.3998557329177856, "learning_rate": 4.4117941201228245e-06, "loss": 0.5598, "step": 8977 }, { "epoch": 0.6320309750087997, "grad_norm": 1.6115599870681763, "learning_rate": 4.4103111913941155e-06, "loss": 0.7105, "step": 8978 }, { "epoch": 0.6321013727560718, "grad_norm": 1.8306970596313477, "learning_rate": 4.4088283973056735e-06, "loss": 0.7344, "step": 8979 }, { "epoch": 0.6321717705033439, "grad_norm": 1.734236240386963, "learning_rate": 4.407345737934588e-06, "loss": 0.5916, "step": 8980 }, { "epoch": 0.632242168250616, "grad_norm": 1.5135917663574219, "learning_rate": 4.405863213357947e-06, "loss": 0.7799, "step": 8981 }, { "epoch": 0.6323125659978881, "grad_norm": 1.5535550117492676, "learning_rate": 4.404380823652826e-06, "loss": 0.6611, "step": 8982 }, { "epoch": 0.6323829637451601, "grad_norm": 1.7427586317062378, "learning_rate": 4.402898568896299e-06, "loss": 0.487, "step": 8983 }, { "epoch": 0.6324533614924323, "grad_norm": 1.7351548671722412, "learning_rate": 4.401416449165425e-06, "loss": 0.7259, "step": 8984 }, { "epoch": 0.6325237592397043, "grad_norm": 2.4127249717712402, "learning_rate": 4.399934464537265e-06, "loss": 0.7635, "step": 8985 }, { "epoch": 0.6325941569869764, "grad_norm": 2.120973587036133, "learning_rate": 4.398452615088864e-06, "loss": 0.7, "step": 8986 }, { "epoch": 0.6326645547342485, "grad_norm": 2.205505847930908, "learning_rate": 4.396970900897271e-06, "loss": 0.6961, "step": 8987 }, { "epoch": 0.6327349524815206, "grad_norm": 1.8597545623779297, "learning_rate": 4.395489322039516e-06, "loss": 0.7071, "step": 8988 }, { "epoch": 0.6328053502287927, "grad_norm": 1.565760850906372, "learning_rate": 4.394007878592632e-06, "loss": 0.6249, "step": 8989 }, { "epoch": 0.6328757479760647, "grad_norm": 1.8781038522720337, "learning_rate": 4.392526570633637e-06, "loss": 0.7174, "step": 8990 }, { "epoch": 0.6329461457233369, "grad_norm": 1.95187509059906, "learning_rate": 4.391045398239548e-06, "loss": 0.7089, "step": 8991 }, { "epoch": 0.633016543470609, "grad_norm": 1.5092695951461792, "learning_rate": 4.389564361487369e-06, "loss": 0.6384, "step": 8992 }, { "epoch": 0.633086941217881, "grad_norm": 1.75419020652771, "learning_rate": 4.388083460454106e-06, "loss": 0.5345, "step": 8993 }, { "epoch": 0.6331573389651531, "grad_norm": 1.936296820640564, "learning_rate": 4.386602695216749e-06, "loss": 0.7201, "step": 8994 }, { "epoch": 0.6332277367124252, "grad_norm": 1.8954877853393555, "learning_rate": 4.385122065852285e-06, "loss": 0.6719, "step": 8995 }, { "epoch": 0.6332981344596973, "grad_norm": 1.8082013130187988, "learning_rate": 4.383641572437691e-06, "loss": 0.7394, "step": 8996 }, { "epoch": 0.6333685322069694, "grad_norm": 2.103165626525879, "learning_rate": 4.382161215049943e-06, "loss": 0.6806, "step": 8997 }, { "epoch": 0.6334389299542414, "grad_norm": 1.5947649478912354, "learning_rate": 4.380680993766002e-06, "loss": 0.5284, "step": 8998 }, { "epoch": 0.6335093277015136, "grad_norm": 1.7913581132888794, "learning_rate": 4.379200908662831e-06, "loss": 0.5852, "step": 8999 }, { "epoch": 0.6335797254487856, "grad_norm": 2.014082193374634, "learning_rate": 4.3777209598173766e-06, "loss": 0.727, "step": 9000 }, { "epoch": 0.6336501231960577, "grad_norm": 1.5731520652770996, "learning_rate": 4.376241147306586e-06, "loss": 0.7344, "step": 9001 }, { "epoch": 0.6337205209433299, "grad_norm": 2.0209779739379883, "learning_rate": 4.374761471207392e-06, "loss": 0.7788, "step": 9002 }, { "epoch": 0.6337909186906019, "grad_norm": 1.631135106086731, "learning_rate": 4.3732819315967295e-06, "loss": 0.6128, "step": 9003 }, { "epoch": 0.633861316437874, "grad_norm": 1.6738003492355347, "learning_rate": 4.371802528551515e-06, "loss": 0.6691, "step": 9004 }, { "epoch": 0.633931714185146, "grad_norm": 1.9799883365631104, "learning_rate": 4.37032326214867e-06, "loss": 0.6484, "step": 9005 }, { "epoch": 0.6340021119324182, "grad_norm": 1.7932630777359009, "learning_rate": 4.368844132465098e-06, "loss": 0.6074, "step": 9006 }, { "epoch": 0.6340725096796902, "grad_norm": 1.7643979787826538, "learning_rate": 4.367365139577706e-06, "loss": 0.6284, "step": 9007 }, { "epoch": 0.6341429074269623, "grad_norm": 1.8642879724502563, "learning_rate": 4.36588628356338e-06, "loss": 0.7349, "step": 9008 }, { "epoch": 0.6342133051742345, "grad_norm": 1.5627418756484985, "learning_rate": 4.364407564499016e-06, "loss": 0.6418, "step": 9009 }, { "epoch": 0.6342837029215065, "grad_norm": 2.265058755874634, "learning_rate": 4.362928982461485e-06, "loss": 0.5617, "step": 9010 }, { "epoch": 0.6343541006687786, "grad_norm": 1.9182465076446533, "learning_rate": 4.361450537527669e-06, "loss": 0.6809, "step": 9011 }, { "epoch": 0.6344244984160506, "grad_norm": 1.69278085231781, "learning_rate": 4.359972229774427e-06, "loss": 0.6218, "step": 9012 }, { "epoch": 0.6344948961633228, "grad_norm": 2.5391154289245605, "learning_rate": 4.35849405927862e-06, "loss": 0.6638, "step": 9013 }, { "epoch": 0.6345652939105949, "grad_norm": 1.84584641456604, "learning_rate": 4.3570160261170975e-06, "loss": 0.6773, "step": 9014 }, { "epoch": 0.6346356916578669, "grad_norm": 1.794948697090149, "learning_rate": 4.35553813036671e-06, "loss": 0.6689, "step": 9015 }, { "epoch": 0.6347060894051391, "grad_norm": 2.4154655933380127, "learning_rate": 4.354060372104286e-06, "loss": 0.6186, "step": 9016 }, { "epoch": 0.6347764871524111, "grad_norm": 1.862809419631958, "learning_rate": 4.352582751406661e-06, "loss": 0.7342, "step": 9017 }, { "epoch": 0.6348468848996832, "grad_norm": 1.8163776397705078, "learning_rate": 4.351105268350656e-06, "loss": 0.6727, "step": 9018 }, { "epoch": 0.6349172826469553, "grad_norm": 1.8684730529785156, "learning_rate": 4.349627923013088e-06, "loss": 0.785, "step": 9019 }, { "epoch": 0.6349876803942274, "grad_norm": 1.6635981798171997, "learning_rate": 4.348150715470762e-06, "loss": 0.6325, "step": 9020 }, { "epoch": 0.6350580781414995, "grad_norm": 1.97501540184021, "learning_rate": 4.346673645800485e-06, "loss": 0.733, "step": 9021 }, { "epoch": 0.6351284758887715, "grad_norm": 1.9633455276489258, "learning_rate": 4.345196714079046e-06, "loss": 0.7586, "step": 9022 }, { "epoch": 0.6351988736360437, "grad_norm": 1.9288759231567383, "learning_rate": 4.3437199203832365e-06, "loss": 0.6283, "step": 9023 }, { "epoch": 0.6352692713833157, "grad_norm": 1.7680613994598389, "learning_rate": 4.342243264789831e-06, "loss": 0.6474, "step": 9024 }, { "epoch": 0.6353396691305878, "grad_norm": 1.6070221662521362, "learning_rate": 4.340766747375608e-06, "loss": 0.707, "step": 9025 }, { "epoch": 0.63541006687786, "grad_norm": 1.8282153606414795, "learning_rate": 4.339290368217327e-06, "loss": 0.6594, "step": 9026 }, { "epoch": 0.635480464625132, "grad_norm": 1.8746263980865479, "learning_rate": 4.337814127391751e-06, "loss": 0.6546, "step": 9027 }, { "epoch": 0.6355508623724041, "grad_norm": 1.964434266090393, "learning_rate": 4.336338024975631e-06, "loss": 0.7449, "step": 9028 }, { "epoch": 0.6356212601196761, "grad_norm": 1.835306167602539, "learning_rate": 4.3348620610457086e-06, "loss": 0.6509, "step": 9029 }, { "epoch": 0.6356916578669483, "grad_norm": 1.9925087690353394, "learning_rate": 4.333386235678722e-06, "loss": 0.6753, "step": 9030 }, { "epoch": 0.6357620556142204, "grad_norm": 1.790116310119629, "learning_rate": 4.3319105489513964e-06, "loss": 0.6791, "step": 9031 }, { "epoch": 0.6358324533614924, "grad_norm": 1.5501362085342407, "learning_rate": 4.3304350009404615e-06, "loss": 0.6163, "step": 9032 }, { "epoch": 0.6359028511087645, "grad_norm": 1.752990484237671, "learning_rate": 4.328959591722627e-06, "loss": 0.636, "step": 9033 }, { "epoch": 0.6359732488560366, "grad_norm": 1.6951661109924316, "learning_rate": 4.3274843213746036e-06, "loss": 0.6688, "step": 9034 }, { "epoch": 0.6360436466033087, "grad_norm": 2.177689552307129, "learning_rate": 4.32600918997309e-06, "loss": 0.7165, "step": 9035 }, { "epoch": 0.6361140443505808, "grad_norm": 1.7550839185714722, "learning_rate": 4.324534197594781e-06, "loss": 0.7021, "step": 9036 }, { "epoch": 0.6361844420978529, "grad_norm": 1.8532358407974243, "learning_rate": 4.32305934431636e-06, "loss": 0.729, "step": 9037 }, { "epoch": 0.636254839845125, "grad_norm": 1.5242801904678345, "learning_rate": 4.321584630214511e-06, "loss": 0.6701, "step": 9038 }, { "epoch": 0.636325237592397, "grad_norm": 2.0381124019622803, "learning_rate": 4.3201100553659e-06, "loss": 0.6717, "step": 9039 }, { "epoch": 0.6363956353396691, "grad_norm": 1.5003957748413086, "learning_rate": 4.318635619847197e-06, "loss": 0.6449, "step": 9040 }, { "epoch": 0.6364660330869412, "grad_norm": 1.5939546823501587, "learning_rate": 4.317161323735055e-06, "loss": 0.7402, "step": 9041 }, { "epoch": 0.6365364308342133, "grad_norm": 1.588822603225708, "learning_rate": 4.315687167106125e-06, "loss": 0.7506, "step": 9042 }, { "epoch": 0.6366068285814854, "grad_norm": 2.5343167781829834, "learning_rate": 4.31421315003705e-06, "loss": 0.7562, "step": 9043 }, { "epoch": 0.6366772263287575, "grad_norm": 1.8913538455963135, "learning_rate": 4.312739272604468e-06, "loss": 0.6563, "step": 9044 }, { "epoch": 0.6367476240760296, "grad_norm": 1.7115387916564941, "learning_rate": 4.311265534885002e-06, "loss": 0.7566, "step": 9045 }, { "epoch": 0.6368180218233016, "grad_norm": 2.016685724258423, "learning_rate": 4.309791936955279e-06, "loss": 0.7746, "step": 9046 }, { "epoch": 0.6368884195705737, "grad_norm": 1.7232788801193237, "learning_rate": 4.308318478891908e-06, "loss": 0.7338, "step": 9047 }, { "epoch": 0.6369588173178459, "grad_norm": 2.094759702682495, "learning_rate": 4.306845160771497e-06, "loss": 0.7257, "step": 9048 }, { "epoch": 0.6370292150651179, "grad_norm": 1.5716664791107178, "learning_rate": 4.305371982670643e-06, "loss": 0.6351, "step": 9049 }, { "epoch": 0.63709961281239, "grad_norm": 1.822417140007019, "learning_rate": 4.303898944665943e-06, "loss": 0.6425, "step": 9050 }, { "epoch": 0.637170010559662, "grad_norm": 1.8643172979354858, "learning_rate": 4.302426046833978e-06, "loss": 0.671, "step": 9051 }, { "epoch": 0.6372404083069342, "grad_norm": 1.7419620752334595, "learning_rate": 4.300953289251326e-06, "loss": 0.6863, "step": 9052 }, { "epoch": 0.6373108060542063, "grad_norm": 1.695651650428772, "learning_rate": 4.299480671994556e-06, "loss": 0.6242, "step": 9053 }, { "epoch": 0.6373812038014783, "grad_norm": 1.6151113510131836, "learning_rate": 4.298008195140234e-06, "loss": 0.685, "step": 9054 }, { "epoch": 0.6374516015487505, "grad_norm": 1.8108160495758057, "learning_rate": 4.2965358587649086e-06, "loss": 0.6591, "step": 9055 }, { "epoch": 0.6375219992960225, "grad_norm": 1.8456366062164307, "learning_rate": 4.295063662945136e-06, "loss": 0.5577, "step": 9056 }, { "epoch": 0.6375923970432946, "grad_norm": 2.0199553966522217, "learning_rate": 4.293591607757451e-06, "loss": 0.7059, "step": 9057 }, { "epoch": 0.6376627947905668, "grad_norm": 1.810887336730957, "learning_rate": 4.292119693278393e-06, "loss": 0.7082, "step": 9058 }, { "epoch": 0.6377331925378388, "grad_norm": 1.7042158842086792, "learning_rate": 4.29064791958448e-06, "loss": 0.633, "step": 9059 }, { "epoch": 0.6378035902851109, "grad_norm": 1.919373869895935, "learning_rate": 4.289176286752239e-06, "loss": 0.6796, "step": 9060 }, { "epoch": 0.6378739880323829, "grad_norm": 2.006330728530884, "learning_rate": 4.287704794858175e-06, "loss": 0.6563, "step": 9061 }, { "epoch": 0.6379443857796551, "grad_norm": 1.450805425643921, "learning_rate": 4.2862334439787966e-06, "loss": 0.59, "step": 9062 }, { "epoch": 0.6380147835269271, "grad_norm": 1.844028353691101, "learning_rate": 4.2847622341906e-06, "loss": 0.7113, "step": 9063 }, { "epoch": 0.6380851812741992, "grad_norm": 1.9812822341918945, "learning_rate": 4.283291165570073e-06, "loss": 0.7193, "step": 9064 }, { "epoch": 0.6381555790214714, "grad_norm": 1.8797199726104736, "learning_rate": 4.281820238193697e-06, "loss": 0.742, "step": 9065 }, { "epoch": 0.6382259767687434, "grad_norm": 2.3496038913726807, "learning_rate": 4.2803494521379534e-06, "loss": 0.6367, "step": 9066 }, { "epoch": 0.6382963745160155, "grad_norm": 2.107584238052368, "learning_rate": 4.278878807479298e-06, "loss": 0.6751, "step": 9067 }, { "epoch": 0.6383667722632875, "grad_norm": 1.7119874954223633, "learning_rate": 4.277408304294204e-06, "loss": 0.592, "step": 9068 }, { "epoch": 0.6384371700105597, "grad_norm": 1.8018893003463745, "learning_rate": 4.275937942659114e-06, "loss": 0.6192, "step": 9069 }, { "epoch": 0.6385075677578318, "grad_norm": 1.8757846355438232, "learning_rate": 4.2744677226504806e-06, "loss": 0.7205, "step": 9070 }, { "epoch": 0.6385779655051038, "grad_norm": 1.9046283960342407, "learning_rate": 4.2729976443447356e-06, "loss": 0.7431, "step": 9071 }, { "epoch": 0.638648363252376, "grad_norm": 1.9265161752700806, "learning_rate": 4.2715277078183155e-06, "loss": 0.7417, "step": 9072 }, { "epoch": 0.638718760999648, "grad_norm": 1.9071297645568848, "learning_rate": 4.270057913147639e-06, "loss": 0.693, "step": 9073 }, { "epoch": 0.6387891587469201, "grad_norm": 1.855251669883728, "learning_rate": 4.268588260409125e-06, "loss": 0.7503, "step": 9074 }, { "epoch": 0.6388595564941922, "grad_norm": 1.939955711364746, "learning_rate": 4.267118749679179e-06, "loss": 0.679, "step": 9075 }, { "epoch": 0.6389299542414643, "grad_norm": 1.8332232236862183, "learning_rate": 4.265649381034205e-06, "loss": 0.5557, "step": 9076 }, { "epoch": 0.6390003519887364, "grad_norm": 1.683099389076233, "learning_rate": 4.264180154550597e-06, "loss": 0.8239, "step": 9077 }, { "epoch": 0.6390707497360084, "grad_norm": 1.7822729349136353, "learning_rate": 4.262711070304741e-06, "loss": 0.7635, "step": 9078 }, { "epoch": 0.6391411474832805, "grad_norm": 1.7841684818267822, "learning_rate": 4.261242128373015e-06, "loss": 0.7259, "step": 9079 }, { "epoch": 0.6392115452305526, "grad_norm": 1.7090479135513306, "learning_rate": 4.259773328831791e-06, "loss": 0.6748, "step": 9080 }, { "epoch": 0.6392819429778247, "grad_norm": 1.8508425951004028, "learning_rate": 4.258304671757434e-06, "loss": 0.7068, "step": 9081 }, { "epoch": 0.6393523407250968, "grad_norm": 1.8740971088409424, "learning_rate": 4.256836157226298e-06, "loss": 0.6273, "step": 9082 }, { "epoch": 0.6394227384723689, "grad_norm": 1.8451915979385376, "learning_rate": 4.2553677853147366e-06, "loss": 0.6393, "step": 9083 }, { "epoch": 0.639493136219641, "grad_norm": 1.6348166465759277, "learning_rate": 4.253899556099089e-06, "loss": 0.6983, "step": 9084 }, { "epoch": 0.639563533966913, "grad_norm": 1.8125122785568237, "learning_rate": 4.2524314696556914e-06, "loss": 0.7732, "step": 9085 }, { "epoch": 0.6396339317141851, "grad_norm": 1.6707355976104736, "learning_rate": 4.250963526060869e-06, "loss": 0.5984, "step": 9086 }, { "epoch": 0.6397043294614573, "grad_norm": 1.770776629447937, "learning_rate": 4.249495725390944e-06, "loss": 0.7111, "step": 9087 }, { "epoch": 0.6397747272087293, "grad_norm": 1.5083149671554565, "learning_rate": 4.248028067722224e-06, "loss": 0.6419, "step": 9088 }, { "epoch": 0.6398451249560014, "grad_norm": 1.5843185186386108, "learning_rate": 4.24656055313102e-06, "loss": 0.6426, "step": 9089 }, { "epoch": 0.6399155227032735, "grad_norm": 1.7143665552139282, "learning_rate": 4.245093181693624e-06, "loss": 0.6626, "step": 9090 }, { "epoch": 0.6399859204505456, "grad_norm": 1.5836093425750732, "learning_rate": 4.243625953486332e-06, "loss": 0.6373, "step": 9091 }, { "epoch": 0.6400563181978177, "grad_norm": 1.6392686367034912, "learning_rate": 4.242158868585421e-06, "loss": 0.6202, "step": 9092 }, { "epoch": 0.6401267159450897, "grad_norm": 2.064199209213257, "learning_rate": 4.240691927067169e-06, "loss": 0.738, "step": 9093 }, { "epoch": 0.6401971136923619, "grad_norm": 1.5762439966201782, "learning_rate": 4.239225129007839e-06, "loss": 0.5804, "step": 9094 }, { "epoch": 0.6402675114396339, "grad_norm": 1.9568367004394531, "learning_rate": 4.237758474483699e-06, "loss": 0.6413, "step": 9095 }, { "epoch": 0.640337909186906, "grad_norm": 1.751034140586853, "learning_rate": 4.236291963570996e-06, "loss": 0.6144, "step": 9096 }, { "epoch": 0.640408306934178, "grad_norm": 2.6431710720062256, "learning_rate": 4.234825596345978e-06, "loss": 0.6392, "step": 9097 }, { "epoch": 0.6404787046814502, "grad_norm": 2.0099048614501953, "learning_rate": 4.2333593728848805e-06, "loss": 0.8248, "step": 9098 }, { "epoch": 0.6405491024287223, "grad_norm": 1.659902572631836, "learning_rate": 4.2318932932639365e-06, "loss": 0.6198, "step": 9099 }, { "epoch": 0.6406195001759943, "grad_norm": 1.6578071117401123, "learning_rate": 4.230427357559363e-06, "loss": 0.7174, "step": 9100 }, { "epoch": 0.6406898979232665, "grad_norm": 1.7286264896392822, "learning_rate": 4.228961565847383e-06, "loss": 0.7039, "step": 9101 }, { "epoch": 0.6407602956705385, "grad_norm": 1.6753085851669312, "learning_rate": 4.227495918204199e-06, "loss": 0.7452, "step": 9102 }, { "epoch": 0.6408306934178106, "grad_norm": 1.5391632318496704, "learning_rate": 4.226030414706016e-06, "loss": 0.7265, "step": 9103 }, { "epoch": 0.6409010911650828, "grad_norm": 1.897689700126648, "learning_rate": 4.224565055429021e-06, "loss": 0.6423, "step": 9104 }, { "epoch": 0.6409714889123548, "grad_norm": 1.9320497512817383, "learning_rate": 4.223099840449404e-06, "loss": 0.7265, "step": 9105 }, { "epoch": 0.6410418866596269, "grad_norm": 2.0784788131713867, "learning_rate": 4.221634769843338e-06, "loss": 0.7472, "step": 9106 }, { "epoch": 0.6411122844068989, "grad_norm": 1.5893454551696777, "learning_rate": 4.220169843687e-06, "loss": 0.6979, "step": 9107 }, { "epoch": 0.6411826821541711, "grad_norm": 2.388697862625122, "learning_rate": 4.218705062056549e-06, "loss": 0.7298, "step": 9108 }, { "epoch": 0.6412530799014432, "grad_norm": 1.9460208415985107, "learning_rate": 4.217240425028141e-06, "loss": 0.6436, "step": 9109 }, { "epoch": 0.6413234776487152, "grad_norm": 1.4626660346984863, "learning_rate": 4.215775932677922e-06, "loss": 0.6, "step": 9110 }, { "epoch": 0.6413938753959874, "grad_norm": 1.7926851511001587, "learning_rate": 4.214311585082035e-06, "loss": 0.6811, "step": 9111 }, { "epoch": 0.6414642731432594, "grad_norm": 1.688137412071228, "learning_rate": 4.21284738231661e-06, "loss": 0.6708, "step": 9112 }, { "epoch": 0.6415346708905315, "grad_norm": 1.9744746685028076, "learning_rate": 4.211383324457775e-06, "loss": 0.6845, "step": 9113 }, { "epoch": 0.6416050686378036, "grad_norm": 1.8298836946487427, "learning_rate": 4.209919411581645e-06, "loss": 0.5412, "step": 9114 }, { "epoch": 0.6416754663850757, "grad_norm": 1.738178014755249, "learning_rate": 4.208455643764334e-06, "loss": 0.6889, "step": 9115 }, { "epoch": 0.6417458641323478, "grad_norm": 2.198148488998413, "learning_rate": 4.20699202108194e-06, "loss": 0.7161, "step": 9116 }, { "epoch": 0.6418162618796198, "grad_norm": 3.5753347873687744, "learning_rate": 4.205528543610563e-06, "loss": 0.6408, "step": 9117 }, { "epoch": 0.641886659626892, "grad_norm": 1.8651832342147827, "learning_rate": 4.204065211426285e-06, "loss": 0.7958, "step": 9118 }, { "epoch": 0.641957057374164, "grad_norm": 1.9919747114181519, "learning_rate": 4.2026020246051914e-06, "loss": 0.6114, "step": 9119 }, { "epoch": 0.6420274551214361, "grad_norm": 1.89947509765625, "learning_rate": 4.20113898322335e-06, "loss": 0.6794, "step": 9120 }, { "epoch": 0.6420978528687082, "grad_norm": 2.2212162017822266, "learning_rate": 4.199676087356829e-06, "loss": 0.6821, "step": 9121 }, { "epoch": 0.6421682506159803, "grad_norm": 1.7974796295166016, "learning_rate": 4.198213337081682e-06, "loss": 0.6229, "step": 9122 }, { "epoch": 0.6422386483632524, "grad_norm": 1.9683970212936401, "learning_rate": 4.1967507324739655e-06, "loss": 0.7107, "step": 9123 }, { "epoch": 0.6423090461105244, "grad_norm": 1.7824574708938599, "learning_rate": 4.1952882736097125e-06, "loss": 0.6347, "step": 9124 }, { "epoch": 0.6423794438577966, "grad_norm": 1.7771830558776855, "learning_rate": 4.193825960564965e-06, "loss": 0.7355, "step": 9125 }, { "epoch": 0.6424498416050687, "grad_norm": 2.035944700241089, "learning_rate": 4.192363793415746e-06, "loss": 0.7087, "step": 9126 }, { "epoch": 0.6425202393523407, "grad_norm": 1.7529469728469849, "learning_rate": 4.190901772238076e-06, "loss": 0.6342, "step": 9127 }, { "epoch": 0.6425906370996128, "grad_norm": 1.695633888244629, "learning_rate": 4.189439897107967e-06, "loss": 0.603, "step": 9128 }, { "epoch": 0.6426610348468849, "grad_norm": 2.252720594406128, "learning_rate": 4.187978168101424e-06, "loss": 0.5411, "step": 9129 }, { "epoch": 0.642731432594157, "grad_norm": 2.5463125705718994, "learning_rate": 4.186516585294443e-06, "loss": 0.7147, "step": 9130 }, { "epoch": 0.6428018303414291, "grad_norm": 1.7267404794692993, "learning_rate": 4.185055148763012e-06, "loss": 0.7914, "step": 9131 }, { "epoch": 0.6428722280887011, "grad_norm": 1.559985876083374, "learning_rate": 4.183593858583114e-06, "loss": 0.6591, "step": 9132 }, { "epoch": 0.6429426258359733, "grad_norm": 2.1675045490264893, "learning_rate": 4.18213271483072e-06, "loss": 0.6918, "step": 9133 }, { "epoch": 0.6430130235832453, "grad_norm": 1.757025122642517, "learning_rate": 4.1806717175818e-06, "loss": 0.5634, "step": 9134 }, { "epoch": 0.6430834213305174, "grad_norm": 1.9944978952407837, "learning_rate": 4.1792108669123095e-06, "loss": 0.6624, "step": 9135 }, { "epoch": 0.6431538190777895, "grad_norm": 1.7281886339187622, "learning_rate": 4.1777501628982025e-06, "loss": 0.7108, "step": 9136 }, { "epoch": 0.6432242168250616, "grad_norm": 1.9555169343948364, "learning_rate": 4.176289605615419e-06, "loss": 0.6065, "step": 9137 }, { "epoch": 0.6432946145723337, "grad_norm": 1.7418522834777832, "learning_rate": 4.174829195139898e-06, "loss": 0.724, "step": 9138 }, { "epoch": 0.6433650123196057, "grad_norm": 2.6539337635040283, "learning_rate": 4.173368931547562e-06, "loss": 0.6258, "step": 9139 }, { "epoch": 0.6434354100668779, "grad_norm": 1.75454580783844, "learning_rate": 4.171908814914339e-06, "loss": 0.6705, "step": 9140 }, { "epoch": 0.6435058078141499, "grad_norm": 1.5221160650253296, "learning_rate": 4.1704488453161355e-06, "loss": 0.5645, "step": 9141 }, { "epoch": 0.643576205561422, "grad_norm": 1.475270390510559, "learning_rate": 4.168989022828861e-06, "loss": 0.6237, "step": 9142 }, { "epoch": 0.6436466033086942, "grad_norm": 1.5776747465133667, "learning_rate": 4.167529347528411e-06, "loss": 0.5912, "step": 9143 }, { "epoch": 0.6437170010559662, "grad_norm": 1.5148683786392212, "learning_rate": 4.166069819490675e-06, "loss": 0.7868, "step": 9144 }, { "epoch": 0.6437873988032383, "grad_norm": 2.7540969848632812, "learning_rate": 4.164610438791534e-06, "loss": 0.6529, "step": 9145 }, { "epoch": 0.6438577965505103, "grad_norm": 1.6126291751861572, "learning_rate": 4.163151205506866e-06, "loss": 0.6802, "step": 9146 }, { "epoch": 0.6439281942977825, "grad_norm": 1.7265514135360718, "learning_rate": 4.161692119712536e-06, "loss": 0.7411, "step": 9147 }, { "epoch": 0.6439985920450546, "grad_norm": 1.862210988998413, "learning_rate": 4.160233181484403e-06, "loss": 0.6742, "step": 9148 }, { "epoch": 0.6440689897923266, "grad_norm": 1.6713520288467407, "learning_rate": 4.1587743908983195e-06, "loss": 0.6545, "step": 9149 }, { "epoch": 0.6441393875395988, "grad_norm": 1.5899771451950073, "learning_rate": 4.157315748030128e-06, "loss": 0.6703, "step": 9150 }, { "epoch": 0.6442097852868708, "grad_norm": 1.4651386737823486, "learning_rate": 4.155857252955663e-06, "loss": 0.5971, "step": 9151 }, { "epoch": 0.6442801830341429, "grad_norm": 2.329368829727173, "learning_rate": 4.1543989057507585e-06, "loss": 0.6262, "step": 9152 }, { "epoch": 0.644350580781415, "grad_norm": 3.0783164501190186, "learning_rate": 4.15294070649123e-06, "loss": 0.6187, "step": 9153 }, { "epoch": 0.6444209785286871, "grad_norm": 1.8251911401748657, "learning_rate": 4.151482655252894e-06, "loss": 0.6209, "step": 9154 }, { "epoch": 0.6444913762759592, "grad_norm": 1.7466423511505127, "learning_rate": 4.1500247521115535e-06, "loss": 0.6142, "step": 9155 }, { "epoch": 0.6445617740232312, "grad_norm": 2.0108773708343506, "learning_rate": 4.148566997143009e-06, "loss": 0.6751, "step": 9156 }, { "epoch": 0.6446321717705034, "grad_norm": 1.8131394386291504, "learning_rate": 4.147109390423045e-06, "loss": 0.6033, "step": 9157 }, { "epoch": 0.6447025695177754, "grad_norm": 2.285221576690674, "learning_rate": 4.145651932027451e-06, "loss": 0.6766, "step": 9158 }, { "epoch": 0.6447729672650475, "grad_norm": 2.095156192779541, "learning_rate": 4.144194622031996e-06, "loss": 0.76, "step": 9159 }, { "epoch": 0.6448433650123196, "grad_norm": 1.7318828105926514, "learning_rate": 4.142737460512451e-06, "loss": 0.6493, "step": 9160 }, { "epoch": 0.6449137627595917, "grad_norm": 1.683665156364441, "learning_rate": 4.141280447544571e-06, "loss": 0.6322, "step": 9161 }, { "epoch": 0.6449841605068638, "grad_norm": 1.8053125143051147, "learning_rate": 4.139823583204112e-06, "loss": 0.7009, "step": 9162 }, { "epoch": 0.6450545582541358, "grad_norm": 1.9910920858383179, "learning_rate": 4.138366867566813e-06, "loss": 0.5715, "step": 9163 }, { "epoch": 0.645124956001408, "grad_norm": 1.7541007995605469, "learning_rate": 4.136910300708413e-06, "loss": 0.5983, "step": 9164 }, { "epoch": 0.6451953537486801, "grad_norm": 1.6353970766067505, "learning_rate": 4.135453882704639e-06, "loss": 0.565, "step": 9165 }, { "epoch": 0.6452657514959521, "grad_norm": 1.7474687099456787, "learning_rate": 4.1339976136312135e-06, "loss": 0.6484, "step": 9166 }, { "epoch": 0.6453361492432242, "grad_norm": 1.6808247566223145, "learning_rate": 4.132541493563845e-06, "loss": 0.6641, "step": 9167 }, { "epoch": 0.6454065469904963, "grad_norm": 1.475577712059021, "learning_rate": 4.131085522578245e-06, "loss": 0.6926, "step": 9168 }, { "epoch": 0.6454769447377684, "grad_norm": 1.7524925470352173, "learning_rate": 4.129629700750102e-06, "loss": 0.5511, "step": 9169 }, { "epoch": 0.6455473424850405, "grad_norm": 2.022364377975464, "learning_rate": 4.1281740281551145e-06, "loss": 0.712, "step": 9170 }, { "epoch": 0.6456177402323126, "grad_norm": 1.6499886512756348, "learning_rate": 4.1267185048689576e-06, "loss": 0.5934, "step": 9171 }, { "epoch": 0.6456881379795847, "grad_norm": 1.826562762260437, "learning_rate": 4.12526313096731e-06, "loss": 0.648, "step": 9172 }, { "epoch": 0.6457585357268567, "grad_norm": 1.4519411325454712, "learning_rate": 4.123807906525833e-06, "loss": 0.5799, "step": 9173 }, { "epoch": 0.6458289334741288, "grad_norm": 2.186215877532959, "learning_rate": 4.122352831620191e-06, "loss": 0.7625, "step": 9174 }, { "epoch": 0.6458993312214009, "grad_norm": 1.9628605842590332, "learning_rate": 4.120897906326029e-06, "loss": 0.633, "step": 9175 }, { "epoch": 0.645969728968673, "grad_norm": 2.0311801433563232, "learning_rate": 4.119443130718994e-06, "loss": 0.6415, "step": 9176 }, { "epoch": 0.6460401267159451, "grad_norm": 2.251056671142578, "learning_rate": 4.117988504874718e-06, "loss": 0.7347, "step": 9177 }, { "epoch": 0.6461105244632172, "grad_norm": 1.6213743686676025, "learning_rate": 4.1165340288688285e-06, "loss": 0.6578, "step": 9178 }, { "epoch": 0.6461809222104893, "grad_norm": 1.863011121749878, "learning_rate": 4.115079702776949e-06, "loss": 0.6486, "step": 9179 }, { "epoch": 0.6462513199577613, "grad_norm": 1.9735623598098755, "learning_rate": 4.113625526674687e-06, "loss": 0.6392, "step": 9180 }, { "epoch": 0.6463217177050334, "grad_norm": 1.7519330978393555, "learning_rate": 4.1121715006376495e-06, "loss": 0.7062, "step": 9181 }, { "epoch": 0.6463921154523056, "grad_norm": 1.7023142576217651, "learning_rate": 4.11071762474143e-06, "loss": 0.7949, "step": 9182 }, { "epoch": 0.6464625131995776, "grad_norm": 1.6941893100738525, "learning_rate": 4.1092638990616185e-06, "loss": 0.6474, "step": 9183 }, { "epoch": 0.6465329109468497, "grad_norm": 2.0015220642089844, "learning_rate": 4.107810323673793e-06, "loss": 0.7546, "step": 9184 }, { "epoch": 0.6466033086941217, "grad_norm": 2.2114908695220947, "learning_rate": 4.10635689865353e-06, "loss": 0.7334, "step": 9185 }, { "epoch": 0.6466737064413939, "grad_norm": 1.6931229829788208, "learning_rate": 4.104903624076391e-06, "loss": 0.616, "step": 9186 }, { "epoch": 0.646744104188666, "grad_norm": 1.809005856513977, "learning_rate": 4.103450500017937e-06, "loss": 0.7084, "step": 9187 }, { "epoch": 0.646814501935938, "grad_norm": 1.4537408351898193, "learning_rate": 4.101997526553713e-06, "loss": 0.5779, "step": 9188 }, { "epoch": 0.6468848996832102, "grad_norm": 1.670598030090332, "learning_rate": 4.100544703759263e-06, "loss": 0.6329, "step": 9189 }, { "epoch": 0.6469552974304822, "grad_norm": 1.5561916828155518, "learning_rate": 4.0990920317101175e-06, "loss": 0.5359, "step": 9190 }, { "epoch": 0.6470256951777543, "grad_norm": 1.9127743244171143, "learning_rate": 4.097639510481807e-06, "loss": 0.6564, "step": 9191 }, { "epoch": 0.6470960929250263, "grad_norm": 1.5878373384475708, "learning_rate": 4.096187140149845e-06, "loss": 0.6945, "step": 9192 }, { "epoch": 0.6471664906722985, "grad_norm": 1.8311591148376465, "learning_rate": 4.094734920789745e-06, "loss": 0.6255, "step": 9193 }, { "epoch": 0.6472368884195706, "grad_norm": 1.7654848098754883, "learning_rate": 4.093282852477007e-06, "loss": 0.5705, "step": 9194 }, { "epoch": 0.6473072861668426, "grad_norm": 2.02492618560791, "learning_rate": 4.091830935287127e-06, "loss": 0.7143, "step": 9195 }, { "epoch": 0.6473776839141148, "grad_norm": 2.170499801635742, "learning_rate": 4.090379169295588e-06, "loss": 0.6385, "step": 9196 }, { "epoch": 0.6474480816613868, "grad_norm": 1.7445669174194336, "learning_rate": 4.0889275545778736e-06, "loss": 0.5525, "step": 9197 }, { "epoch": 0.6475184794086589, "grad_norm": 1.6344386339187622, "learning_rate": 4.087476091209451e-06, "loss": 0.7382, "step": 9198 }, { "epoch": 0.647588877155931, "grad_norm": 1.803437352180481, "learning_rate": 4.086024779265785e-06, "loss": 0.6828, "step": 9199 }, { "epoch": 0.6476592749032031, "grad_norm": 1.8265740871429443, "learning_rate": 4.084573618822327e-06, "loss": 0.724, "step": 9200 }, { "epoch": 0.6477296726504752, "grad_norm": 2.0018696784973145, "learning_rate": 4.083122609954531e-06, "loss": 0.6309, "step": 9201 }, { "epoch": 0.6478000703977472, "grad_norm": 1.6273831129074097, "learning_rate": 4.08167175273783e-06, "loss": 0.6032, "step": 9202 }, { "epoch": 0.6478704681450194, "grad_norm": 1.7669438123703003, "learning_rate": 4.080221047247659e-06, "loss": 0.7656, "step": 9203 }, { "epoch": 0.6479408658922915, "grad_norm": 1.8705053329467773, "learning_rate": 4.0787704935594376e-06, "loss": 0.7096, "step": 9204 }, { "epoch": 0.6480112636395635, "grad_norm": 1.9329562187194824, "learning_rate": 4.077320091748585e-06, "loss": 0.6925, "step": 9205 }, { "epoch": 0.6480816613868357, "grad_norm": 1.8714104890823364, "learning_rate": 4.0758698418905065e-06, "loss": 0.5881, "step": 9206 }, { "epoch": 0.6481520591341077, "grad_norm": 1.8278003931045532, "learning_rate": 4.074419744060604e-06, "loss": 0.7287, "step": 9207 }, { "epoch": 0.6482224568813798, "grad_norm": 2.1428380012512207, "learning_rate": 4.072969798334271e-06, "loss": 0.6466, "step": 9208 }, { "epoch": 0.6482928546286519, "grad_norm": 1.7402604818344116, "learning_rate": 4.071520004786888e-06, "loss": 0.7588, "step": 9209 }, { "epoch": 0.648363252375924, "grad_norm": 1.705783724784851, "learning_rate": 4.07007036349383e-06, "loss": 0.7302, "step": 9210 }, { "epoch": 0.6484336501231961, "grad_norm": 1.7048941850662231, "learning_rate": 4.068620874530468e-06, "loss": 0.607, "step": 9211 }, { "epoch": 0.6485040478704681, "grad_norm": 2.0970375537872314, "learning_rate": 4.06717153797216e-06, "loss": 0.6625, "step": 9212 }, { "epoch": 0.6485744456177402, "grad_norm": 1.9738019704818726, "learning_rate": 4.065722353894264e-06, "loss": 0.5544, "step": 9213 }, { "epoch": 0.6486448433650123, "grad_norm": 2.048354387283325, "learning_rate": 4.064273322372119e-06, "loss": 0.6928, "step": 9214 }, { "epoch": 0.6487152411122844, "grad_norm": 1.5039058923721313, "learning_rate": 4.062824443481063e-06, "loss": 0.6418, "step": 9215 }, { "epoch": 0.6487856388595565, "grad_norm": 1.8252884149551392, "learning_rate": 4.061375717296421e-06, "loss": 0.716, "step": 9216 }, { "epoch": 0.6488560366068286, "grad_norm": 1.739681363105774, "learning_rate": 4.05992714389352e-06, "loss": 0.6148, "step": 9217 }, { "epoch": 0.6489264343541007, "grad_norm": 1.884558081626892, "learning_rate": 4.0584787233476666e-06, "loss": 0.561, "step": 9218 }, { "epoch": 0.6489968321013727, "grad_norm": 3.210170030593872, "learning_rate": 4.057030455734172e-06, "loss": 0.7332, "step": 9219 }, { "epoch": 0.6490672298486448, "grad_norm": 1.7378777265548706, "learning_rate": 4.055582341128328e-06, "loss": 0.6976, "step": 9220 }, { "epoch": 0.649137627595917, "grad_norm": 1.7024846076965332, "learning_rate": 4.0541343796054254e-06, "loss": 0.7878, "step": 9221 }, { "epoch": 0.649208025343189, "grad_norm": 1.9325062036514282, "learning_rate": 4.052686571240741e-06, "loss": 0.7457, "step": 9222 }, { "epoch": 0.6492784230904611, "grad_norm": 1.5473859310150146, "learning_rate": 4.051238916109554e-06, "loss": 0.6305, "step": 9223 }, { "epoch": 0.6493488208377332, "grad_norm": 1.710837483406067, "learning_rate": 4.049791414287124e-06, "loss": 0.7099, "step": 9224 }, { "epoch": 0.6494192185850053, "grad_norm": 1.9322608709335327, "learning_rate": 4.0483440658487125e-06, "loss": 0.5749, "step": 9225 }, { "epoch": 0.6494896163322774, "grad_norm": 1.7355010509490967, "learning_rate": 4.046896870869567e-06, "loss": 0.6302, "step": 9226 }, { "epoch": 0.6495600140795494, "grad_norm": 1.909568428993225, "learning_rate": 4.045449829424924e-06, "loss": 0.6986, "step": 9227 }, { "epoch": 0.6496304118268216, "grad_norm": 2.2857961654663086, "learning_rate": 4.044002941590023e-06, "loss": 0.561, "step": 9228 }, { "epoch": 0.6497008095740936, "grad_norm": 1.5829561948776245, "learning_rate": 4.042556207440084e-06, "loss": 0.6993, "step": 9229 }, { "epoch": 0.6497712073213657, "grad_norm": 1.6681197881698608, "learning_rate": 4.041109627050329e-06, "loss": 0.5433, "step": 9230 }, { "epoch": 0.6498416050686378, "grad_norm": 1.7921912670135498, "learning_rate": 4.039663200495961e-06, "loss": 0.6214, "step": 9231 }, { "epoch": 0.6499120028159099, "grad_norm": 1.8705651760101318, "learning_rate": 4.0382169278521895e-06, "loss": 0.5266, "step": 9232 }, { "epoch": 0.649982400563182, "grad_norm": 1.5860049724578857, "learning_rate": 4.036770809194197e-06, "loss": 0.7168, "step": 9233 }, { "epoch": 0.650052798310454, "grad_norm": 1.86215341091156, "learning_rate": 4.035324844597177e-06, "loss": 0.5586, "step": 9234 }, { "epoch": 0.6501231960577262, "grad_norm": 1.8198368549346924, "learning_rate": 4.0338790341363e-06, "loss": 0.6595, "step": 9235 }, { "epoch": 0.6501935938049982, "grad_norm": 1.8672734498977661, "learning_rate": 4.032433377886741e-06, "loss": 0.6471, "step": 9236 }, { "epoch": 0.6502639915522703, "grad_norm": 1.7846415042877197, "learning_rate": 4.030987875923655e-06, "loss": 0.6181, "step": 9237 }, { "epoch": 0.6503343892995425, "grad_norm": 1.664820671081543, "learning_rate": 4.029542528322205e-06, "loss": 0.594, "step": 9238 }, { "epoch": 0.6504047870468145, "grad_norm": 1.943035364151001, "learning_rate": 4.028097335157523e-06, "loss": 0.7272, "step": 9239 }, { "epoch": 0.6504751847940866, "grad_norm": 2.471278190612793, "learning_rate": 4.026652296504755e-06, "loss": 0.5899, "step": 9240 }, { "epoch": 0.6505455825413586, "grad_norm": 1.8564873933792114, "learning_rate": 4.025207412439024e-06, "loss": 0.6896, "step": 9241 }, { "epoch": 0.6506159802886308, "grad_norm": 1.808910608291626, "learning_rate": 4.0237626830354566e-06, "loss": 0.6178, "step": 9242 }, { "epoch": 0.6506863780359029, "grad_norm": 1.6726816892623901, "learning_rate": 4.02231810836916e-06, "loss": 0.674, "step": 9243 }, { "epoch": 0.6507567757831749, "grad_norm": 1.9593249559402466, "learning_rate": 4.020873688515247e-06, "loss": 0.6339, "step": 9244 }, { "epoch": 0.6508271735304471, "grad_norm": 1.8224382400512695, "learning_rate": 4.019429423548803e-06, "loss": 0.7181, "step": 9245 }, { "epoch": 0.6508975712777191, "grad_norm": 1.8617184162139893, "learning_rate": 4.0179853135449275e-06, "loss": 0.6607, "step": 9246 }, { "epoch": 0.6509679690249912, "grad_norm": 1.2727385759353638, "learning_rate": 4.016541358578692e-06, "loss": 0.7472, "step": 9247 }, { "epoch": 0.6510383667722632, "grad_norm": 2.0536115169525146, "learning_rate": 4.015097558725176e-06, "loss": 0.6132, "step": 9248 }, { "epoch": 0.6511087645195354, "grad_norm": 1.9759191274642944, "learning_rate": 4.013653914059438e-06, "loss": 0.6799, "step": 9249 }, { "epoch": 0.6511791622668075, "grad_norm": 1.9957935810089111, "learning_rate": 4.0122104246565446e-06, "loss": 0.6422, "step": 9250 }, { "epoch": 0.6512495600140795, "grad_norm": 1.6152325868606567, "learning_rate": 4.01076709059153e-06, "loss": 0.6617, "step": 9251 }, { "epoch": 0.6513199577613517, "grad_norm": 1.6450284719467163, "learning_rate": 4.009323911939444e-06, "loss": 0.6254, "step": 9252 }, { "epoch": 0.6513903555086237, "grad_norm": 1.97744882106781, "learning_rate": 4.007880888775316e-06, "loss": 0.5949, "step": 9253 }, { "epoch": 0.6514607532558958, "grad_norm": 2.0414931774139404, "learning_rate": 4.006438021174171e-06, "loss": 0.7387, "step": 9254 }, { "epoch": 0.6515311510031679, "grad_norm": 1.8643038272857666, "learning_rate": 4.004995309211023e-06, "loss": 0.7092, "step": 9255 }, { "epoch": 0.65160154875044, "grad_norm": 1.800934910774231, "learning_rate": 4.003552752960886e-06, "loss": 0.6488, "step": 9256 }, { "epoch": 0.6516719464977121, "grad_norm": 2.065474271774292, "learning_rate": 4.0021103524987496e-06, "loss": 0.6943, "step": 9257 }, { "epoch": 0.6517423442449841, "grad_norm": 1.7736327648162842, "learning_rate": 4.0006681078996135e-06, "loss": 0.7201, "step": 9258 }, { "epoch": 0.6518127419922563, "grad_norm": 2.072310209274292, "learning_rate": 3.999226019238457e-06, "loss": 0.7205, "step": 9259 }, { "epoch": 0.6518831397395284, "grad_norm": 2.1861965656280518, "learning_rate": 3.997784086590259e-06, "loss": 0.6334, "step": 9260 }, { "epoch": 0.6519535374868004, "grad_norm": 1.6824575662612915, "learning_rate": 3.996342310029984e-06, "loss": 0.713, "step": 9261 }, { "epoch": 0.6520239352340725, "grad_norm": 1.9644349813461304, "learning_rate": 3.994900689632595e-06, "loss": 0.6616, "step": 9262 }, { "epoch": 0.6520943329813446, "grad_norm": 1.7756037712097168, "learning_rate": 3.99345922547304e-06, "loss": 0.705, "step": 9263 }, { "epoch": 0.6521647307286167, "grad_norm": 1.5650938749313354, "learning_rate": 3.992017917626263e-06, "loss": 0.5902, "step": 9264 }, { "epoch": 0.6522351284758888, "grad_norm": 2.184943914413452, "learning_rate": 3.990576766167195e-06, "loss": 0.73, "step": 9265 }, { "epoch": 0.6523055262231608, "grad_norm": 1.9270200729370117, "learning_rate": 3.98913577117077e-06, "loss": 0.6795, "step": 9266 }, { "epoch": 0.652375923970433, "grad_norm": 2.0938117504119873, "learning_rate": 3.987694932711901e-06, "loss": 0.7106, "step": 9267 }, { "epoch": 0.652446321717705, "grad_norm": 1.5736783742904663, "learning_rate": 3.9862542508655035e-06, "loss": 0.6036, "step": 9268 }, { "epoch": 0.6525167194649771, "grad_norm": 1.5542707443237305, "learning_rate": 3.9848137257064755e-06, "loss": 0.593, "step": 9269 }, { "epoch": 0.6525871172122492, "grad_norm": 2.0307464599609375, "learning_rate": 3.983373357309713e-06, "loss": 0.6262, "step": 9270 }, { "epoch": 0.6526575149595213, "grad_norm": 1.7807729244232178, "learning_rate": 3.9819331457500996e-06, "loss": 0.6044, "step": 9271 }, { "epoch": 0.6527279127067934, "grad_norm": 1.7125300168991089, "learning_rate": 3.980493091102517e-06, "loss": 0.6674, "step": 9272 }, { "epoch": 0.6527983104540654, "grad_norm": 1.880088210105896, "learning_rate": 3.9790531934418315e-06, "loss": 0.6954, "step": 9273 }, { "epoch": 0.6528687082013376, "grad_norm": 2.079099416732788, "learning_rate": 3.977613452842909e-06, "loss": 0.626, "step": 9274 }, { "epoch": 0.6529391059486096, "grad_norm": 1.6816585063934326, "learning_rate": 3.9761738693806e-06, "loss": 0.5569, "step": 9275 }, { "epoch": 0.6530095036958817, "grad_norm": 1.8992546796798706, "learning_rate": 3.97473444312975e-06, "loss": 0.6487, "step": 9276 }, { "epoch": 0.6530799014431539, "grad_norm": 2.0140066146850586, "learning_rate": 3.973295174165194e-06, "loss": 0.5378, "step": 9277 }, { "epoch": 0.6531502991904259, "grad_norm": 2.5940306186676025, "learning_rate": 3.971856062561762e-06, "loss": 0.7788, "step": 9278 }, { "epoch": 0.653220696937698, "grad_norm": 1.9250954389572144, "learning_rate": 3.970417108394279e-06, "loss": 0.6997, "step": 9279 }, { "epoch": 0.65329109468497, "grad_norm": 1.898069143295288, "learning_rate": 3.968978311737554e-06, "loss": 0.64, "step": 9280 }, { "epoch": 0.6533614924322422, "grad_norm": 1.7730474472045898, "learning_rate": 3.967539672666393e-06, "loss": 0.7441, "step": 9281 }, { "epoch": 0.6534318901795143, "grad_norm": 1.6446844339370728, "learning_rate": 3.966101191255586e-06, "loss": 0.5498, "step": 9282 }, { "epoch": 0.6535022879267863, "grad_norm": 1.8587230443954468, "learning_rate": 3.96466286757993e-06, "loss": 0.6087, "step": 9283 }, { "epoch": 0.6535726856740585, "grad_norm": 1.8281733989715576, "learning_rate": 3.963224701714197e-06, "loss": 0.58, "step": 9284 }, { "epoch": 0.6536430834213305, "grad_norm": 2.5593385696411133, "learning_rate": 3.961786693733165e-06, "loss": 0.6069, "step": 9285 }, { "epoch": 0.6537134811686026, "grad_norm": 1.7586474418640137, "learning_rate": 3.960348843711594e-06, "loss": 0.6699, "step": 9286 }, { "epoch": 0.6537838789158746, "grad_norm": 1.6561896800994873, "learning_rate": 3.958911151724241e-06, "loss": 0.6285, "step": 9287 }, { "epoch": 0.6538542766631468, "grad_norm": 1.8961546421051025, "learning_rate": 3.957473617845847e-06, "loss": 0.6882, "step": 9288 }, { "epoch": 0.6539246744104189, "grad_norm": 1.3032306432724, "learning_rate": 3.956036242151159e-06, "loss": 0.7141, "step": 9289 }, { "epoch": 0.6539950721576909, "grad_norm": 1.6677374839782715, "learning_rate": 3.954599024714899e-06, "loss": 0.6517, "step": 9290 }, { "epoch": 0.6540654699049631, "grad_norm": 1.8500113487243652, "learning_rate": 3.953161965611798e-06, "loss": 0.8063, "step": 9291 }, { "epoch": 0.6541358676522351, "grad_norm": 1.961794137954712, "learning_rate": 3.951725064916565e-06, "loss": 0.6605, "step": 9292 }, { "epoch": 0.6542062653995072, "grad_norm": 1.6367532014846802, "learning_rate": 3.950288322703907e-06, "loss": 0.7377, "step": 9293 }, { "epoch": 0.6542766631467793, "grad_norm": 2.065352439880371, "learning_rate": 3.948851739048519e-06, "loss": 0.6489, "step": 9294 }, { "epoch": 0.6543470608940514, "grad_norm": 1.6857280731201172, "learning_rate": 3.947415314025093e-06, "loss": 0.6695, "step": 9295 }, { "epoch": 0.6544174586413235, "grad_norm": 1.8349976539611816, "learning_rate": 3.945979047708309e-06, "loss": 0.6338, "step": 9296 }, { "epoch": 0.6544878563885955, "grad_norm": 1.5804296731948853, "learning_rate": 3.944542940172842e-06, "loss": 0.6281, "step": 9297 }, { "epoch": 0.6545582541358677, "grad_norm": 1.5988562107086182, "learning_rate": 3.943106991493355e-06, "loss": 0.6845, "step": 9298 }, { "epoch": 0.6546286518831398, "grad_norm": 1.7060246467590332, "learning_rate": 3.941671201744503e-06, "loss": 0.6488, "step": 9299 }, { "epoch": 0.6546990496304118, "grad_norm": 1.7776978015899658, "learning_rate": 3.940235571000933e-06, "loss": 0.6622, "step": 9300 }, { "epoch": 0.6547694473776839, "grad_norm": 2.334264039993286, "learning_rate": 3.93880009933729e-06, "loss": 0.6468, "step": 9301 }, { "epoch": 0.654839845124956, "grad_norm": 1.9047025442123413, "learning_rate": 3.9373647868282e-06, "loss": 0.7207, "step": 9302 }, { "epoch": 0.6549102428722281, "grad_norm": 1.7843750715255737, "learning_rate": 3.935929633548289e-06, "loss": 0.6959, "step": 9303 }, { "epoch": 0.6549806406195001, "grad_norm": 2.3160400390625, "learning_rate": 3.934494639572172e-06, "loss": 0.7093, "step": 9304 }, { "epoch": 0.6550510383667723, "grad_norm": 2.0542423725128174, "learning_rate": 3.933059804974456e-06, "loss": 0.7259, "step": 9305 }, { "epoch": 0.6551214361140444, "grad_norm": 1.8615144491195679, "learning_rate": 3.9316251298297354e-06, "loss": 0.6739, "step": 9306 }, { "epoch": 0.6551918338613164, "grad_norm": 1.9861087799072266, "learning_rate": 3.930190614212605e-06, "loss": 0.5984, "step": 9307 }, { "epoch": 0.6552622316085885, "grad_norm": 1.8594142198562622, "learning_rate": 3.928756258197643e-06, "loss": 0.5971, "step": 9308 }, { "epoch": 0.6553326293558606, "grad_norm": 1.7186833620071411, "learning_rate": 3.927322061859427e-06, "loss": 0.6712, "step": 9309 }, { "epoch": 0.6554030271031327, "grad_norm": 1.8496427536010742, "learning_rate": 3.925888025272519e-06, "loss": 0.6182, "step": 9310 }, { "epoch": 0.6554734248504048, "grad_norm": 1.968284010887146, "learning_rate": 3.924454148511478e-06, "loss": 0.6157, "step": 9311 }, { "epoch": 0.6555438225976769, "grad_norm": 1.9680858850479126, "learning_rate": 3.923020431650848e-06, "loss": 0.7107, "step": 9312 }, { "epoch": 0.655614220344949, "grad_norm": 1.6702308654785156, "learning_rate": 3.921586874765176e-06, "loss": 0.676, "step": 9313 }, { "epoch": 0.655684618092221, "grad_norm": 1.6679167747497559, "learning_rate": 3.920153477928986e-06, "loss": 0.5896, "step": 9314 }, { "epoch": 0.6557550158394931, "grad_norm": 2.0232794284820557, "learning_rate": 3.918720241216809e-06, "loss": 0.7058, "step": 9315 }, { "epoch": 0.6558254135867653, "grad_norm": 1.7558764219284058, "learning_rate": 3.917287164703158e-06, "loss": 0.7455, "step": 9316 }, { "epoch": 0.6558958113340373, "grad_norm": 1.791579246520996, "learning_rate": 3.9158542484625386e-06, "loss": 0.7082, "step": 9317 }, { "epoch": 0.6559662090813094, "grad_norm": 1.8052475452423096, "learning_rate": 3.9144214925694465e-06, "loss": 0.658, "step": 9318 }, { "epoch": 0.6560366068285814, "grad_norm": 1.7970119714736938, "learning_rate": 3.912988897098381e-06, "loss": 0.5835, "step": 9319 }, { "epoch": 0.6561070045758536, "grad_norm": 1.8704392910003662, "learning_rate": 3.911556462123812e-06, "loss": 0.754, "step": 9320 }, { "epoch": 0.6561774023231257, "grad_norm": 1.7874860763549805, "learning_rate": 3.910124187720224e-06, "loss": 0.6831, "step": 9321 }, { "epoch": 0.6562478000703977, "grad_norm": 1.7530848979949951, "learning_rate": 3.908692073962079e-06, "loss": 0.5845, "step": 9322 }, { "epoch": 0.6563181978176699, "grad_norm": 1.7472612857818604, "learning_rate": 3.907260120923831e-06, "loss": 0.5381, "step": 9323 }, { "epoch": 0.6563885955649419, "grad_norm": 1.931133508682251, "learning_rate": 3.905828328679929e-06, "loss": 0.7108, "step": 9324 }, { "epoch": 0.656458993312214, "grad_norm": 1.842126727104187, "learning_rate": 3.9043966973048154e-06, "loss": 0.6377, "step": 9325 }, { "epoch": 0.656529391059486, "grad_norm": 1.8766865730285645, "learning_rate": 3.90296522687292e-06, "loss": 0.624, "step": 9326 }, { "epoch": 0.6565997888067582, "grad_norm": 1.5321043729782104, "learning_rate": 3.901533917458669e-06, "loss": 0.6861, "step": 9327 }, { "epoch": 0.6566701865540303, "grad_norm": 1.8581103086471558, "learning_rate": 3.900102769136477e-06, "loss": 0.7645, "step": 9328 }, { "epoch": 0.6567405843013023, "grad_norm": 1.873976707458496, "learning_rate": 3.898671781980746e-06, "loss": 0.7283, "step": 9329 }, { "epoch": 0.6568109820485745, "grad_norm": 1.926349401473999, "learning_rate": 3.89724095606588e-06, "loss": 0.5764, "step": 9330 }, { "epoch": 0.6568813797958465, "grad_norm": 1.674834966659546, "learning_rate": 3.895810291466265e-06, "loss": 0.7369, "step": 9331 }, { "epoch": 0.6569517775431186, "grad_norm": 2.2116198539733887, "learning_rate": 3.894379788256285e-06, "loss": 0.7339, "step": 9332 }, { "epoch": 0.6570221752903908, "grad_norm": 1.9121167659759521, "learning_rate": 3.892949446510315e-06, "loss": 0.6056, "step": 9333 }, { "epoch": 0.6570925730376628, "grad_norm": 1.728078842163086, "learning_rate": 3.891519266302716e-06, "loss": 0.6296, "step": 9334 }, { "epoch": 0.6571629707849349, "grad_norm": 1.9227606058120728, "learning_rate": 3.8900892477078424e-06, "loss": 0.7269, "step": 9335 }, { "epoch": 0.6572333685322069, "grad_norm": 1.5952627658843994, "learning_rate": 3.888659390800048e-06, "loss": 0.6807, "step": 9336 }, { "epoch": 0.6573037662794791, "grad_norm": 1.7665531635284424, "learning_rate": 3.887229695653668e-06, "loss": 0.7226, "step": 9337 }, { "epoch": 0.6573741640267512, "grad_norm": 1.693566083908081, "learning_rate": 3.885800162343038e-06, "loss": 0.5786, "step": 9338 }, { "epoch": 0.6574445617740232, "grad_norm": 1.9455152750015259, "learning_rate": 3.884370790942474e-06, "loss": 0.6893, "step": 9339 }, { "epoch": 0.6575149595212954, "grad_norm": 1.8459147214889526, "learning_rate": 3.882941581526301e-06, "loss": 0.738, "step": 9340 }, { "epoch": 0.6575853572685674, "grad_norm": 1.68338942527771, "learning_rate": 3.881512534168811e-06, "loss": 0.6283, "step": 9341 }, { "epoch": 0.6576557550158395, "grad_norm": 1.9208943843841553, "learning_rate": 3.8800836489443125e-06, "loss": 0.6059, "step": 9342 }, { "epoch": 0.6577261527631115, "grad_norm": 2.097825050354004, "learning_rate": 3.878654925927087e-06, "loss": 0.6932, "step": 9343 }, { "epoch": 0.6577965505103837, "grad_norm": 2.5287721157073975, "learning_rate": 3.877226365191423e-06, "loss": 0.7499, "step": 9344 }, { "epoch": 0.6578669482576558, "grad_norm": 1.8156718015670776, "learning_rate": 3.875797966811585e-06, "loss": 0.6296, "step": 9345 }, { "epoch": 0.6579373460049278, "grad_norm": 2.035356044769287, "learning_rate": 3.874369730861846e-06, "loss": 0.618, "step": 9346 }, { "epoch": 0.6580077437522, "grad_norm": 1.8672561645507812, "learning_rate": 3.872941657416449e-06, "loss": 0.6737, "step": 9347 }, { "epoch": 0.658078141499472, "grad_norm": 1.7850664854049683, "learning_rate": 3.8715137465496505e-06, "loss": 0.6397, "step": 9348 }, { "epoch": 0.6581485392467441, "grad_norm": 2.1780202388763428, "learning_rate": 3.870085998335683e-06, "loss": 0.8194, "step": 9349 }, { "epoch": 0.6582189369940162, "grad_norm": 1.7338881492614746, "learning_rate": 3.868658412848782e-06, "loss": 0.7316, "step": 9350 }, { "epoch": 0.6582893347412883, "grad_norm": 1.994356632232666, "learning_rate": 3.867230990163163e-06, "loss": 0.6706, "step": 9351 }, { "epoch": 0.6583597324885604, "grad_norm": 1.8832377195358276, "learning_rate": 3.865803730353048e-06, "loss": 0.7154, "step": 9352 }, { "epoch": 0.6584301302358324, "grad_norm": 2.072601079940796, "learning_rate": 3.8643766334926285e-06, "loss": 0.6205, "step": 9353 }, { "epoch": 0.6585005279831045, "grad_norm": 1.9571412801742554, "learning_rate": 3.8629496996561124e-06, "loss": 0.6285, "step": 9354 }, { "epoch": 0.6585709257303767, "grad_norm": 2.2114007472991943, "learning_rate": 3.861522928917678e-06, "loss": 0.676, "step": 9355 }, { "epoch": 0.6586413234776487, "grad_norm": 1.747896671295166, "learning_rate": 3.860096321351512e-06, "loss": 0.6466, "step": 9356 }, { "epoch": 0.6587117212249208, "grad_norm": 1.8667545318603516, "learning_rate": 3.85866987703178e-06, "loss": 0.6468, "step": 9357 }, { "epoch": 0.6587821189721929, "grad_norm": 1.873183012008667, "learning_rate": 3.857243596032651e-06, "loss": 0.6143, "step": 9358 }, { "epoch": 0.658852516719465, "grad_norm": 1.8371046781539917, "learning_rate": 3.855817478428269e-06, "loss": 0.7148, "step": 9359 }, { "epoch": 0.6589229144667371, "grad_norm": 2.0181965827941895, "learning_rate": 3.854391524292785e-06, "loss": 0.7362, "step": 9360 }, { "epoch": 0.6589933122140091, "grad_norm": 1.776138424873352, "learning_rate": 3.852965733700332e-06, "loss": 0.6717, "step": 9361 }, { "epoch": 0.6590637099612813, "grad_norm": 1.8375993967056274, "learning_rate": 3.851540106725045e-06, "loss": 0.6491, "step": 9362 }, { "epoch": 0.6591341077085533, "grad_norm": 2.354966163635254, "learning_rate": 3.850114643441035e-06, "loss": 0.6855, "step": 9363 }, { "epoch": 0.6592045054558254, "grad_norm": 2.0186827182769775, "learning_rate": 3.848689343922424e-06, "loss": 0.563, "step": 9364 }, { "epoch": 0.6592749032030975, "grad_norm": 1.699950933456421, "learning_rate": 3.847264208243302e-06, "loss": 0.5164, "step": 9365 }, { "epoch": 0.6593453009503696, "grad_norm": 1.9686071872711182, "learning_rate": 3.845839236477772e-06, "loss": 0.7041, "step": 9366 }, { "epoch": 0.6594156986976417, "grad_norm": 1.9450112581253052, "learning_rate": 3.844414428699913e-06, "loss": 0.6658, "step": 9367 }, { "epoch": 0.6594860964449137, "grad_norm": 1.7091954946517944, "learning_rate": 3.84298978498381e-06, "loss": 0.6915, "step": 9368 }, { "epoch": 0.6595564941921859, "grad_norm": 1.781018614768982, "learning_rate": 3.841565305403523e-06, "loss": 0.7121, "step": 9369 }, { "epoch": 0.6596268919394579, "grad_norm": 2.0257205963134766, "learning_rate": 3.84014099003312e-06, "loss": 0.5844, "step": 9370 }, { "epoch": 0.65969728968673, "grad_norm": 1.6777924299240112, "learning_rate": 3.838716838946649e-06, "loss": 0.6722, "step": 9371 }, { "epoch": 0.6597676874340022, "grad_norm": 1.9068189859390259, "learning_rate": 3.837292852218151e-06, "loss": 0.7787, "step": 9372 }, { "epoch": 0.6598380851812742, "grad_norm": 1.8569881916046143, "learning_rate": 3.83586902992166e-06, "loss": 0.6965, "step": 9373 }, { "epoch": 0.6599084829285463, "grad_norm": 1.8083025217056274, "learning_rate": 3.834445372131208e-06, "loss": 0.6455, "step": 9374 }, { "epoch": 0.6599788806758183, "grad_norm": 2.1253786087036133, "learning_rate": 3.833021878920803e-06, "loss": 0.6295, "step": 9375 }, { "epoch": 0.6600492784230905, "grad_norm": 2.0754146575927734, "learning_rate": 3.831598550364462e-06, "loss": 0.6116, "step": 9376 }, { "epoch": 0.6601196761703626, "grad_norm": 1.8463486433029175, "learning_rate": 3.8301753865361825e-06, "loss": 0.6796, "step": 9377 }, { "epoch": 0.6601900739176346, "grad_norm": 2.059767007827759, "learning_rate": 3.828752387509952e-06, "loss": 0.6802, "step": 9378 }, { "epoch": 0.6602604716649068, "grad_norm": 1.8597520589828491, "learning_rate": 3.827329553359759e-06, "loss": 0.6336, "step": 9379 }, { "epoch": 0.6603308694121788, "grad_norm": 2.247623920440674, "learning_rate": 3.825906884159574e-06, "loss": 0.8309, "step": 9380 }, { "epoch": 0.6604012671594509, "grad_norm": 1.5921657085418701, "learning_rate": 3.824484379983368e-06, "loss": 0.5828, "step": 9381 }, { "epoch": 0.6604716649067229, "grad_norm": 1.774383544921875, "learning_rate": 3.823062040905096e-06, "loss": 0.6137, "step": 9382 }, { "epoch": 0.6605420626539951, "grad_norm": 2.2428903579711914, "learning_rate": 3.821639866998704e-06, "loss": 0.6702, "step": 9383 }, { "epoch": 0.6606124604012672, "grad_norm": 2.34417986869812, "learning_rate": 3.82021785833813e-06, "loss": 0.7328, "step": 9384 }, { "epoch": 0.6606828581485392, "grad_norm": 2.37935209274292, "learning_rate": 3.8187960149973134e-06, "loss": 0.6755, "step": 9385 }, { "epoch": 0.6607532558958114, "grad_norm": 1.5201268196105957, "learning_rate": 3.81737433705017e-06, "loss": 0.8158, "step": 9386 }, { "epoch": 0.6608236536430834, "grad_norm": 1.9414503574371338, "learning_rate": 3.81595282457062e-06, "loss": 0.6332, "step": 9387 }, { "epoch": 0.6608940513903555, "grad_norm": 2.269308567047119, "learning_rate": 3.814531477632567e-06, "loss": 0.7784, "step": 9388 }, { "epoch": 0.6609644491376276, "grad_norm": 2.037278175354004, "learning_rate": 3.8131102963099074e-06, "loss": 0.676, "step": 9389 }, { "epoch": 0.6610348468848997, "grad_norm": 1.9515653848648071, "learning_rate": 3.8116892806765264e-06, "loss": 0.6856, "step": 9390 }, { "epoch": 0.6611052446321718, "grad_norm": 1.9382226467132568, "learning_rate": 3.81026843080631e-06, "loss": 0.619, "step": 9391 }, { "epoch": 0.6611756423794438, "grad_norm": 1.9385818243026733, "learning_rate": 3.808847746773123e-06, "loss": 0.6896, "step": 9392 }, { "epoch": 0.661246040126716, "grad_norm": 1.860613465309143, "learning_rate": 3.807427228650836e-06, "loss": 0.6672, "step": 9393 }, { "epoch": 0.6613164378739881, "grad_norm": 1.765803337097168, "learning_rate": 3.8060068765132986e-06, "loss": 0.688, "step": 9394 }, { "epoch": 0.6613868356212601, "grad_norm": 2.1296679973602295, "learning_rate": 3.8045866904343553e-06, "loss": 0.5571, "step": 9395 }, { "epoch": 0.6614572333685322, "grad_norm": 1.6398696899414062, "learning_rate": 3.803166670487842e-06, "loss": 0.6189, "step": 9396 }, { "epoch": 0.6615276311158043, "grad_norm": 1.6248990297317505, "learning_rate": 3.8017468167475912e-06, "loss": 0.6881, "step": 9397 }, { "epoch": 0.6615980288630764, "grad_norm": 1.6861724853515625, "learning_rate": 3.8003271292874172e-06, "loss": 0.4796, "step": 9398 }, { "epoch": 0.6616684266103484, "grad_norm": 1.970700979232788, "learning_rate": 3.798907608181136e-06, "loss": 0.6016, "step": 9399 }, { "epoch": 0.6617388243576205, "grad_norm": 1.733167290687561, "learning_rate": 3.797488253502548e-06, "loss": 0.5123, "step": 9400 }, { "epoch": 0.6618092221048927, "grad_norm": 1.840609073638916, "learning_rate": 3.796069065325445e-06, "loss": 0.7049, "step": 9401 }, { "epoch": 0.6618796198521647, "grad_norm": 1.7726327180862427, "learning_rate": 3.7946500437236114e-06, "loss": 0.7589, "step": 9402 }, { "epoch": 0.6619500175994368, "grad_norm": 1.8357254266738892, "learning_rate": 3.793231188770827e-06, "loss": 0.6453, "step": 9403 }, { "epoch": 0.6620204153467089, "grad_norm": 2.0251801013946533, "learning_rate": 3.7918125005408546e-06, "loss": 0.7275, "step": 9404 }, { "epoch": 0.662090813093981, "grad_norm": 1.869101881980896, "learning_rate": 3.7903939791074584e-06, "loss": 0.5974, "step": 9405 }, { "epoch": 0.6621612108412531, "grad_norm": 1.8836418390274048, "learning_rate": 3.7889756245443865e-06, "loss": 0.6715, "step": 9406 }, { "epoch": 0.6622316085885251, "grad_norm": 2.020716905593872, "learning_rate": 3.78755743692538e-06, "loss": 0.7204, "step": 9407 }, { "epoch": 0.6623020063357973, "grad_norm": 1.8265376091003418, "learning_rate": 3.7861394163241683e-06, "loss": 0.7036, "step": 9408 }, { "epoch": 0.6623724040830693, "grad_norm": 1.682831883430481, "learning_rate": 3.784721562814482e-06, "loss": 0.688, "step": 9409 }, { "epoch": 0.6624428018303414, "grad_norm": 1.6723463535308838, "learning_rate": 3.7833038764700316e-06, "loss": 0.5509, "step": 9410 }, { "epoch": 0.6625131995776136, "grad_norm": 1.6346769332885742, "learning_rate": 3.7818863573645275e-06, "loss": 0.74, "step": 9411 }, { "epoch": 0.6625835973248856, "grad_norm": 1.6294368505477905, "learning_rate": 3.7804690055716665e-06, "loss": 0.6559, "step": 9412 }, { "epoch": 0.6626539950721577, "grad_norm": 2.148592472076416, "learning_rate": 3.7790518211651384e-06, "loss": 0.6743, "step": 9413 }, { "epoch": 0.6627243928194297, "grad_norm": 1.7098876237869263, "learning_rate": 3.7776348042186197e-06, "loss": 0.6661, "step": 9414 }, { "epoch": 0.6627947905667019, "grad_norm": 1.8687469959259033, "learning_rate": 3.7762179548057884e-06, "loss": 0.6908, "step": 9415 }, { "epoch": 0.662865188313974, "grad_norm": 1.7020469903945923, "learning_rate": 3.7748012730003034e-06, "loss": 0.6844, "step": 9416 }, { "epoch": 0.662935586061246, "grad_norm": 1.7845388650894165, "learning_rate": 3.7733847588758233e-06, "loss": 0.731, "step": 9417 }, { "epoch": 0.6630059838085182, "grad_norm": 1.5086592435836792, "learning_rate": 3.7719684125059915e-06, "loss": 0.6395, "step": 9418 }, { "epoch": 0.6630763815557902, "grad_norm": 1.7365214824676514, "learning_rate": 3.7705522339644463e-06, "loss": 0.6199, "step": 9419 }, { "epoch": 0.6631467793030623, "grad_norm": 1.5664703845977783, "learning_rate": 3.7691362233248116e-06, "loss": 0.6601, "step": 9420 }, { "epoch": 0.6632171770503343, "grad_norm": 1.9312852621078491, "learning_rate": 3.7677203806607134e-06, "loss": 0.592, "step": 9421 }, { "epoch": 0.6632875747976065, "grad_norm": 1.7388761043548584, "learning_rate": 3.7663047060457577e-06, "loss": 0.7163, "step": 9422 }, { "epoch": 0.6633579725448786, "grad_norm": 2.020277500152588, "learning_rate": 3.764889199553552e-06, "loss": 0.6825, "step": 9423 }, { "epoch": 0.6634283702921506, "grad_norm": 1.5706300735473633, "learning_rate": 3.763473861257686e-06, "loss": 0.4898, "step": 9424 }, { "epoch": 0.6634987680394228, "grad_norm": 1.9300932884216309, "learning_rate": 3.762058691231746e-06, "loss": 0.6349, "step": 9425 }, { "epoch": 0.6635691657866948, "grad_norm": 2.148848533630371, "learning_rate": 3.7606436895493034e-06, "loss": 0.7609, "step": 9426 }, { "epoch": 0.6636395635339669, "grad_norm": 2.1542716026306152, "learning_rate": 3.7592288562839326e-06, "loss": 0.6681, "step": 9427 }, { "epoch": 0.663709961281239, "grad_norm": 1.895219326019287, "learning_rate": 3.757814191509185e-06, "loss": 0.6336, "step": 9428 }, { "epoch": 0.6637803590285111, "grad_norm": 1.7701058387756348, "learning_rate": 3.756399695298617e-06, "loss": 0.5946, "step": 9429 }, { "epoch": 0.6638507567757832, "grad_norm": 1.8151096105575562, "learning_rate": 3.7549853677257666e-06, "loss": 0.7242, "step": 9430 }, { "epoch": 0.6639211545230552, "grad_norm": 1.7594743967056274, "learning_rate": 3.7535712088641626e-06, "loss": 0.6536, "step": 9431 }, { "epoch": 0.6639915522703274, "grad_norm": 1.5877035856246948, "learning_rate": 3.7521572187873356e-06, "loss": 0.6477, "step": 9432 }, { "epoch": 0.6640619500175995, "grad_norm": 1.8904672861099243, "learning_rate": 3.7507433975687916e-06, "loss": 0.7276, "step": 9433 }, { "epoch": 0.6641323477648715, "grad_norm": 2.0647292137145996, "learning_rate": 3.7493297452820455e-06, "loss": 0.6654, "step": 9434 }, { "epoch": 0.6642027455121436, "grad_norm": 1.8412766456604004, "learning_rate": 3.7479162620005887e-06, "loss": 0.6895, "step": 9435 }, { "epoch": 0.6642731432594157, "grad_norm": 1.7119956016540527, "learning_rate": 3.7465029477979116e-06, "loss": 0.5475, "step": 9436 }, { "epoch": 0.6643435410066878, "grad_norm": 2.0989201068878174, "learning_rate": 3.7450898027474896e-06, "loss": 0.7361, "step": 9437 }, { "epoch": 0.6644139387539598, "grad_norm": 1.9646848440170288, "learning_rate": 3.743676826922799e-06, "loss": 0.7195, "step": 9438 }, { "epoch": 0.664484336501232, "grad_norm": 2.0146331787109375, "learning_rate": 3.742264020397297e-06, "loss": 0.6212, "step": 9439 }, { "epoch": 0.6645547342485041, "grad_norm": 1.6136441230773926, "learning_rate": 3.740851383244441e-06, "loss": 0.6513, "step": 9440 }, { "epoch": 0.6646251319957761, "grad_norm": 1.706161379814148, "learning_rate": 3.739438915537674e-06, "loss": 0.5638, "step": 9441 }, { "epoch": 0.6646955297430482, "grad_norm": 1.7694684267044067, "learning_rate": 3.73802661735043e-06, "loss": 0.7085, "step": 9442 }, { "epoch": 0.6647659274903203, "grad_norm": 1.7655645608901978, "learning_rate": 3.7366144887561344e-06, "loss": 0.7057, "step": 9443 }, { "epoch": 0.6648363252375924, "grad_norm": 2.1455209255218506, "learning_rate": 3.7352025298282098e-06, "loss": 0.6754, "step": 9444 }, { "epoch": 0.6649067229848645, "grad_norm": 1.7925257682800293, "learning_rate": 3.7337907406400596e-06, "loss": 0.7276, "step": 9445 }, { "epoch": 0.6649771207321366, "grad_norm": 1.767127513885498, "learning_rate": 3.73237912126509e-06, "loss": 0.6557, "step": 9446 }, { "epoch": 0.6650475184794087, "grad_norm": 1.6553949117660522, "learning_rate": 3.730967671776685e-06, "loss": 0.672, "step": 9447 }, { "epoch": 0.6651179162266807, "grad_norm": 1.9977774620056152, "learning_rate": 3.7295563922482383e-06, "loss": 0.6976, "step": 9448 }, { "epoch": 0.6651883139739528, "grad_norm": 2.2203333377838135, "learning_rate": 3.72814528275311e-06, "loss": 0.8528, "step": 9449 }, { "epoch": 0.665258711721225, "grad_norm": 1.574150800704956, "learning_rate": 3.726734343364675e-06, "loss": 0.7574, "step": 9450 }, { "epoch": 0.665329109468497, "grad_norm": 1.5870815515518188, "learning_rate": 3.725323574156283e-06, "loss": 0.73, "step": 9451 }, { "epoch": 0.6653995072157691, "grad_norm": 1.824533462524414, "learning_rate": 3.7239129752012874e-06, "loss": 0.6344, "step": 9452 }, { "epoch": 0.6654699049630411, "grad_norm": 1.805981993675232, "learning_rate": 3.7225025465730195e-06, "loss": 0.6628, "step": 9453 }, { "epoch": 0.6655403027103133, "grad_norm": 1.9066455364227295, "learning_rate": 3.7210922883448193e-06, "loss": 0.6215, "step": 9454 }, { "epoch": 0.6656107004575853, "grad_norm": 2.1212494373321533, "learning_rate": 3.719682200589994e-06, "loss": 0.7001, "step": 9455 }, { "epoch": 0.6656810982048574, "grad_norm": 1.9196668863296509, "learning_rate": 3.7182722833818653e-06, "loss": 0.6521, "step": 9456 }, { "epoch": 0.6657514959521296, "grad_norm": 2.0645368099212646, "learning_rate": 3.71686253679373e-06, "loss": 0.6809, "step": 9457 }, { "epoch": 0.6658218936994016, "grad_norm": 1.5834754705429077, "learning_rate": 3.715452960898887e-06, "loss": 0.5932, "step": 9458 }, { "epoch": 0.6658922914466737, "grad_norm": 1.7434378862380981, "learning_rate": 3.7140435557706167e-06, "loss": 0.7071, "step": 9459 }, { "epoch": 0.6659626891939457, "grad_norm": 1.925760269165039, "learning_rate": 3.712634321482203e-06, "loss": 0.7043, "step": 9460 }, { "epoch": 0.6660330869412179, "grad_norm": 1.6326918601989746, "learning_rate": 3.7112252581069033e-06, "loss": 0.7072, "step": 9461 }, { "epoch": 0.66610348468849, "grad_norm": 1.5848122835159302, "learning_rate": 3.7098163657179824e-06, "loss": 0.7213, "step": 9462 }, { "epoch": 0.666173882435762, "grad_norm": 2.198622465133667, "learning_rate": 3.7084076443886875e-06, "loss": 0.6907, "step": 9463 }, { "epoch": 0.6662442801830342, "grad_norm": 1.8447812795639038, "learning_rate": 3.7069990941922622e-06, "loss": 0.6861, "step": 9464 }, { "epoch": 0.6663146779303062, "grad_norm": 1.9498350620269775, "learning_rate": 3.7055907152019335e-06, "loss": 0.6605, "step": 9465 }, { "epoch": 0.6663850756775783, "grad_norm": 2.246990203857422, "learning_rate": 3.7041825074909325e-06, "loss": 0.7552, "step": 9466 }, { "epoch": 0.6664554734248505, "grad_norm": 2.06561541557312, "learning_rate": 3.702774471132462e-06, "loss": 0.6062, "step": 9467 }, { "epoch": 0.6665258711721225, "grad_norm": 1.8024877309799194, "learning_rate": 3.701366606199736e-06, "loss": 0.62, "step": 9468 }, { "epoch": 0.6665962689193946, "grad_norm": 1.9804096221923828, "learning_rate": 3.6999589127659445e-06, "loss": 0.6381, "step": 9469 }, { "epoch": 0.6666666666666666, "grad_norm": 1.9676563739776611, "learning_rate": 3.6985513909042815e-06, "loss": 0.5617, "step": 9470 }, { "epoch": 0.6667370644139388, "grad_norm": 2.255150556564331, "learning_rate": 3.697144040687918e-06, "loss": 0.6844, "step": 9471 }, { "epoch": 0.6668074621612109, "grad_norm": 2.004815101623535, "learning_rate": 3.6957368621900332e-06, "loss": 0.6601, "step": 9472 }, { "epoch": 0.6668778599084829, "grad_norm": 1.783914566040039, "learning_rate": 3.694329855483775e-06, "loss": 0.754, "step": 9473 }, { "epoch": 0.666948257655755, "grad_norm": 1.7357683181762695, "learning_rate": 3.692923020642305e-06, "loss": 0.6352, "step": 9474 }, { "epoch": 0.6670186554030271, "grad_norm": 2.2291619777679443, "learning_rate": 3.6915163577387594e-06, "loss": 0.6928, "step": 9475 }, { "epoch": 0.6670890531502992, "grad_norm": 1.7138569355010986, "learning_rate": 3.690109866846277e-06, "loss": 0.6203, "step": 9476 }, { "epoch": 0.6671594508975712, "grad_norm": 1.8721575736999512, "learning_rate": 3.6887035480379772e-06, "loss": 0.6842, "step": 9477 }, { "epoch": 0.6672298486448434, "grad_norm": 1.749845027923584, "learning_rate": 3.687297401386983e-06, "loss": 0.6819, "step": 9478 }, { "epoch": 0.6673002463921155, "grad_norm": 2.062042713165283, "learning_rate": 3.685891426966396e-06, "loss": 0.5885, "step": 9479 }, { "epoch": 0.6673706441393875, "grad_norm": 1.974887728691101, "learning_rate": 3.6844856248493123e-06, "loss": 0.7419, "step": 9480 }, { "epoch": 0.6674410418866596, "grad_norm": 1.9293524026870728, "learning_rate": 3.683079995108827e-06, "loss": 0.6923, "step": 9481 }, { "epoch": 0.6675114396339317, "grad_norm": 2.1494898796081543, "learning_rate": 3.681674537818014e-06, "loss": 0.762, "step": 9482 }, { "epoch": 0.6675818373812038, "grad_norm": 1.9716849327087402, "learning_rate": 3.680269253049949e-06, "loss": 0.6586, "step": 9483 }, { "epoch": 0.6676522351284759, "grad_norm": 1.6840028762817383, "learning_rate": 3.678864140877693e-06, "loss": 0.8098, "step": 9484 }, { "epoch": 0.667722632875748, "grad_norm": 1.8613474369049072, "learning_rate": 3.6774592013742976e-06, "loss": 0.6336, "step": 9485 }, { "epoch": 0.6677930306230201, "grad_norm": 2.0760087966918945, "learning_rate": 3.676054434612804e-06, "loss": 0.5454, "step": 9486 }, { "epoch": 0.6678634283702921, "grad_norm": 1.9296091794967651, "learning_rate": 3.6746498406662544e-06, "loss": 0.6302, "step": 9487 }, { "epoch": 0.6679338261175642, "grad_norm": 1.5954692363739014, "learning_rate": 3.6732454196076686e-06, "loss": 0.5939, "step": 9488 }, { "epoch": 0.6680042238648364, "grad_norm": 1.4578001499176025, "learning_rate": 3.671841171510068e-06, "loss": 0.834, "step": 9489 }, { "epoch": 0.6680746216121084, "grad_norm": 1.9700286388397217, "learning_rate": 3.6704370964464604e-06, "loss": 0.7237, "step": 9490 }, { "epoch": 0.6681450193593805, "grad_norm": 1.9430780410766602, "learning_rate": 3.669033194489842e-06, "loss": 0.6651, "step": 9491 }, { "epoch": 0.6682154171066526, "grad_norm": 1.8464229106903076, "learning_rate": 3.6676294657132033e-06, "loss": 0.5712, "step": 9492 }, { "epoch": 0.6682858148539247, "grad_norm": 1.7033932209014893, "learning_rate": 3.666225910189529e-06, "loss": 0.6762, "step": 9493 }, { "epoch": 0.6683562126011967, "grad_norm": 1.6306568384170532, "learning_rate": 3.6648225279917855e-06, "loss": 0.7623, "step": 9494 }, { "epoch": 0.6684266103484688, "grad_norm": 1.611977458000183, "learning_rate": 3.6634193191929425e-06, "loss": 0.6986, "step": 9495 }, { "epoch": 0.668497008095741, "grad_norm": 1.513511300086975, "learning_rate": 3.662016283865951e-06, "loss": 0.734, "step": 9496 }, { "epoch": 0.668567405843013, "grad_norm": 1.7697981595993042, "learning_rate": 3.660613422083756e-06, "loss": 0.6212, "step": 9497 }, { "epoch": 0.6686378035902851, "grad_norm": 1.6116514205932617, "learning_rate": 3.6592107339192894e-06, "loss": 0.7491, "step": 9498 }, { "epoch": 0.6687082013375572, "grad_norm": 1.8408095836639404, "learning_rate": 3.6578082194454866e-06, "loss": 0.7408, "step": 9499 }, { "epoch": 0.6687785990848293, "grad_norm": 2.066254138946533, "learning_rate": 3.6564058787352583e-06, "loss": 0.8014, "step": 9500 }, { "epoch": 0.6688489968321014, "grad_norm": 2.787003993988037, "learning_rate": 3.6550037118615195e-06, "loss": 0.6655, "step": 9501 }, { "epoch": 0.6689193945793734, "grad_norm": 1.6057971715927124, "learning_rate": 3.6536017188971673e-06, "loss": 0.7232, "step": 9502 }, { "epoch": 0.6689897923266456, "grad_norm": 1.6754474639892578, "learning_rate": 3.6521998999150913e-06, "loss": 0.5428, "step": 9503 }, { "epoch": 0.6690601900739176, "grad_norm": 1.8186367750167847, "learning_rate": 3.650798254988173e-06, "loss": 0.5656, "step": 9504 }, { "epoch": 0.6691305878211897, "grad_norm": 1.7936228513717651, "learning_rate": 3.649396784189288e-06, "loss": 0.6159, "step": 9505 }, { "epoch": 0.6692009855684619, "grad_norm": 2.2544877529144287, "learning_rate": 3.6479954875912966e-06, "loss": 0.6732, "step": 9506 }, { "epoch": 0.6692713833157339, "grad_norm": 2.093235731124878, "learning_rate": 3.646594365267058e-06, "loss": 0.6031, "step": 9507 }, { "epoch": 0.669341781063006, "grad_norm": 1.6977181434631348, "learning_rate": 3.645193417289416e-06, "loss": 0.7282, "step": 9508 }, { "epoch": 0.669412178810278, "grad_norm": 1.627936601638794, "learning_rate": 3.643792643731206e-06, "loss": 0.6186, "step": 9509 }, { "epoch": 0.6694825765575502, "grad_norm": 1.8520921468734741, "learning_rate": 3.642392044665254e-06, "loss": 0.6344, "step": 9510 }, { "epoch": 0.6695529743048222, "grad_norm": 1.7636120319366455, "learning_rate": 3.640991620164382e-06, "loss": 0.6929, "step": 9511 }, { "epoch": 0.6696233720520943, "grad_norm": 1.7518593072891235, "learning_rate": 3.639591370301396e-06, "loss": 0.5579, "step": 9512 }, { "epoch": 0.6696937697993665, "grad_norm": 1.8623628616333008, "learning_rate": 3.6381912951491003e-06, "loss": 0.6431, "step": 9513 }, { "epoch": 0.6697641675466385, "grad_norm": 1.692836046218872, "learning_rate": 3.6367913947802838e-06, "loss": 0.6742, "step": 9514 }, { "epoch": 0.6698345652939106, "grad_norm": 1.660165548324585, "learning_rate": 3.635391669267729e-06, "loss": 0.6749, "step": 9515 }, { "epoch": 0.6699049630411826, "grad_norm": 1.7880569696426392, "learning_rate": 3.6339921186842055e-06, "loss": 0.6573, "step": 9516 }, { "epoch": 0.6699753607884548, "grad_norm": 1.8317365646362305, "learning_rate": 3.632592743102483e-06, "loss": 0.6833, "step": 9517 }, { "epoch": 0.6700457585357269, "grad_norm": 1.6970726251602173, "learning_rate": 3.631193542595311e-06, "loss": 0.7091, "step": 9518 }, { "epoch": 0.6701161562829989, "grad_norm": 1.8068472146987915, "learning_rate": 3.62979451723544e-06, "loss": 0.5906, "step": 9519 }, { "epoch": 0.670186554030271, "grad_norm": 1.910352349281311, "learning_rate": 3.6283956670956037e-06, "loss": 0.6304, "step": 9520 }, { "epoch": 0.6702569517775431, "grad_norm": 1.8509044647216797, "learning_rate": 3.6269969922485303e-06, "loss": 0.6171, "step": 9521 }, { "epoch": 0.6703273495248152, "grad_norm": 1.9555388689041138, "learning_rate": 3.6255984927669363e-06, "loss": 0.5205, "step": 9522 }, { "epoch": 0.6703977472720873, "grad_norm": 1.9409325122833252, "learning_rate": 3.624200168723535e-06, "loss": 0.6705, "step": 9523 }, { "epoch": 0.6704681450193594, "grad_norm": 1.757805585861206, "learning_rate": 3.622802020191022e-06, "loss": 0.8119, "step": 9524 }, { "epoch": 0.6705385427666315, "grad_norm": 4.313814640045166, "learning_rate": 3.6214040472420923e-06, "loss": 0.6612, "step": 9525 }, { "epoch": 0.6706089405139035, "grad_norm": 1.7196035385131836, "learning_rate": 3.6200062499494267e-06, "loss": 0.6445, "step": 9526 }, { "epoch": 0.6706793382611757, "grad_norm": 1.7752717733383179, "learning_rate": 3.6186086283856972e-06, "loss": 0.7024, "step": 9527 }, { "epoch": 0.6707497360084478, "grad_norm": 1.9368467330932617, "learning_rate": 3.6172111826235645e-06, "loss": 0.7656, "step": 9528 }, { "epoch": 0.6708201337557198, "grad_norm": 1.908431887626648, "learning_rate": 3.615813912735686e-06, "loss": 0.6182, "step": 9529 }, { "epoch": 0.6708905315029919, "grad_norm": 1.8805185556411743, "learning_rate": 3.6144168187947103e-06, "loss": 0.6937, "step": 9530 }, { "epoch": 0.670960929250264, "grad_norm": 1.5054733753204346, "learning_rate": 3.6130199008732695e-06, "loss": 0.6798, "step": 9531 }, { "epoch": 0.6710313269975361, "grad_norm": 1.8607593774795532, "learning_rate": 3.6116231590439916e-06, "loss": 0.7082, "step": 9532 }, { "epoch": 0.6711017247448081, "grad_norm": 1.6252061128616333, "learning_rate": 3.6102265933794928e-06, "loss": 0.5119, "step": 9533 }, { "epoch": 0.6711721224920802, "grad_norm": 1.8360724449157715, "learning_rate": 3.6088302039523853e-06, "loss": 0.641, "step": 9534 }, { "epoch": 0.6712425202393524, "grad_norm": 1.9635742902755737, "learning_rate": 3.607433990835264e-06, "loss": 0.6628, "step": 9535 }, { "epoch": 0.6713129179866244, "grad_norm": 2.2618408203125, "learning_rate": 3.6060379541007246e-06, "loss": 0.6885, "step": 9536 }, { "epoch": 0.6713833157338965, "grad_norm": 2.410571336746216, "learning_rate": 3.6046420938213457e-06, "loss": 0.688, "step": 9537 }, { "epoch": 0.6714537134811686, "grad_norm": 1.7200067043304443, "learning_rate": 3.6032464100696995e-06, "loss": 0.6093, "step": 9538 }, { "epoch": 0.6715241112284407, "grad_norm": 2.1232666969299316, "learning_rate": 3.601850902918346e-06, "loss": 0.6745, "step": 9539 }, { "epoch": 0.6715945089757128, "grad_norm": 1.7412270307540894, "learning_rate": 3.6004555724398446e-06, "loss": 0.7165, "step": 9540 }, { "epoch": 0.6716649067229848, "grad_norm": 1.8341511487960815, "learning_rate": 3.5990604187067336e-06, "loss": 0.6809, "step": 9541 }, { "epoch": 0.671735304470257, "grad_norm": 1.9009373188018799, "learning_rate": 3.5976654417915546e-06, "loss": 0.6056, "step": 9542 }, { "epoch": 0.671805702217529, "grad_norm": 1.619065761566162, "learning_rate": 3.5962706417668307e-06, "loss": 0.6084, "step": 9543 }, { "epoch": 0.6718760999648011, "grad_norm": 1.690170168876648, "learning_rate": 3.5948760187050784e-06, "loss": 0.6679, "step": 9544 }, { "epoch": 0.6719464977120733, "grad_norm": 2.2986021041870117, "learning_rate": 3.5934815726788034e-06, "loss": 0.6715, "step": 9545 }, { "epoch": 0.6720168954593453, "grad_norm": 1.7641257047653198, "learning_rate": 3.592087303760509e-06, "loss": 0.6538, "step": 9546 }, { "epoch": 0.6720872932066174, "grad_norm": 1.7836750745773315, "learning_rate": 3.590693212022679e-06, "loss": 0.614, "step": 9547 }, { "epoch": 0.6721576909538894, "grad_norm": 1.4622468948364258, "learning_rate": 3.5892992975378e-06, "loss": 0.6495, "step": 9548 }, { "epoch": 0.6722280887011616, "grad_norm": 1.8348771333694458, "learning_rate": 3.587905560378339e-06, "loss": 0.674, "step": 9549 }, { "epoch": 0.6722984864484336, "grad_norm": 1.719138741493225, "learning_rate": 3.586512000616758e-06, "loss": 0.59, "step": 9550 }, { "epoch": 0.6723688841957057, "grad_norm": 2.1608736515045166, "learning_rate": 3.5851186183255065e-06, "loss": 0.6208, "step": 9551 }, { "epoch": 0.6724392819429779, "grad_norm": 1.814595103263855, "learning_rate": 3.583725413577034e-06, "loss": 0.5974, "step": 9552 }, { "epoch": 0.6725096796902499, "grad_norm": 1.994049072265625, "learning_rate": 3.5823323864437686e-06, "loss": 0.6678, "step": 9553 }, { "epoch": 0.672580077437522, "grad_norm": 1.7073501348495483, "learning_rate": 3.5809395369981395e-06, "loss": 0.5204, "step": 9554 }, { "epoch": 0.672650475184794, "grad_norm": 2.0833985805511475, "learning_rate": 3.5795468653125592e-06, "loss": 0.8357, "step": 9555 }, { "epoch": 0.6727208729320662, "grad_norm": 1.846512794494629, "learning_rate": 3.57815437145944e-06, "loss": 0.6764, "step": 9556 }, { "epoch": 0.6727912706793383, "grad_norm": 2.240967035293579, "learning_rate": 3.5767620555111673e-06, "loss": 0.6787, "step": 9557 }, { "epoch": 0.6728616684266103, "grad_norm": 1.6403827667236328, "learning_rate": 3.57536991754014e-06, "loss": 0.7156, "step": 9558 }, { "epoch": 0.6729320661738825, "grad_norm": 1.5792381763458252, "learning_rate": 3.5739779576187283e-06, "loss": 0.6447, "step": 9559 }, { "epoch": 0.6730024639211545, "grad_norm": 2.1986870765686035, "learning_rate": 3.5725861758193085e-06, "loss": 0.6905, "step": 9560 }, { "epoch": 0.6730728616684266, "grad_norm": 1.9966109991073608, "learning_rate": 3.5711945722142354e-06, "loss": 0.572, "step": 9561 }, { "epoch": 0.6731432594156987, "grad_norm": 1.5639630556106567, "learning_rate": 3.569803146875866e-06, "loss": 0.6173, "step": 9562 }, { "epoch": 0.6732136571629708, "grad_norm": 1.6846318244934082, "learning_rate": 3.568411899876533e-06, "loss": 0.5936, "step": 9563 }, { "epoch": 0.6732840549102429, "grad_norm": 1.6655956506729126, "learning_rate": 3.5670208312885754e-06, "loss": 0.6481, "step": 9564 }, { "epoch": 0.6733544526575149, "grad_norm": 1.6137974262237549, "learning_rate": 3.565629941184312e-06, "loss": 0.6987, "step": 9565 }, { "epoch": 0.6734248504047871, "grad_norm": 2.266724109649658, "learning_rate": 3.5642392296360603e-06, "loss": 0.6172, "step": 9566 }, { "epoch": 0.6734952481520592, "grad_norm": 1.5917441844940186, "learning_rate": 3.5628486967161194e-06, "loss": 0.6102, "step": 9567 }, { "epoch": 0.6735656458993312, "grad_norm": 1.7653508186340332, "learning_rate": 3.5614583424967946e-06, "loss": 0.6588, "step": 9568 }, { "epoch": 0.6736360436466033, "grad_norm": 2.4729971885681152, "learning_rate": 3.5600681670503576e-06, "loss": 0.6465, "step": 9569 }, { "epoch": 0.6737064413938754, "grad_norm": 1.6277023553848267, "learning_rate": 3.5586781704490957e-06, "loss": 0.643, "step": 9570 }, { "epoch": 0.6737768391411475, "grad_norm": 1.7675042152404785, "learning_rate": 3.55728835276527e-06, "loss": 0.6329, "step": 9571 }, { "epoch": 0.6738472368884195, "grad_norm": 1.6642038822174072, "learning_rate": 3.5558987140711426e-06, "loss": 0.6726, "step": 9572 }, { "epoch": 0.6739176346356917, "grad_norm": 1.9192132949829102, "learning_rate": 3.5545092544389574e-06, "loss": 0.7024, "step": 9573 }, { "epoch": 0.6739880323829638, "grad_norm": 1.8174347877502441, "learning_rate": 3.553119973940963e-06, "loss": 0.5683, "step": 9574 }, { "epoch": 0.6740584301302358, "grad_norm": 1.9017715454101562, "learning_rate": 3.5517308726493765e-06, "loss": 0.676, "step": 9575 }, { "epoch": 0.6741288278775079, "grad_norm": 1.959851861000061, "learning_rate": 3.5503419506364276e-06, "loss": 0.6248, "step": 9576 }, { "epoch": 0.67419922562478, "grad_norm": 1.7759286165237427, "learning_rate": 3.5489532079743225e-06, "loss": 0.6489, "step": 9577 }, { "epoch": 0.6742696233720521, "grad_norm": 1.7180191278457642, "learning_rate": 3.5475646447352684e-06, "loss": 0.6015, "step": 9578 }, { "epoch": 0.6743400211193242, "grad_norm": 2.043351411819458, "learning_rate": 3.546176260991452e-06, "loss": 0.6129, "step": 9579 }, { "epoch": 0.6744104188665963, "grad_norm": 2.4942402839660645, "learning_rate": 3.5447880568150623e-06, "loss": 0.7096, "step": 9580 }, { "epoch": 0.6744808166138684, "grad_norm": 1.7417913675308228, "learning_rate": 3.5434000322782714e-06, "loss": 0.6306, "step": 9581 }, { "epoch": 0.6745512143611404, "grad_norm": 1.631111741065979, "learning_rate": 3.54201218745324e-06, "loss": 0.6692, "step": 9582 }, { "epoch": 0.6746216121084125, "grad_norm": 1.8886805772781372, "learning_rate": 3.5406245224121296e-06, "loss": 0.6756, "step": 9583 }, { "epoch": 0.6746920098556847, "grad_norm": 2.2180871963500977, "learning_rate": 3.5392370372270814e-06, "loss": 0.6954, "step": 9584 }, { "epoch": 0.6747624076029567, "grad_norm": 1.5210446119308472, "learning_rate": 3.5378497319702355e-06, "loss": 0.6014, "step": 9585 }, { "epoch": 0.6748328053502288, "grad_norm": 1.8090317249298096, "learning_rate": 3.5364626067137185e-06, "loss": 0.6126, "step": 9586 }, { "epoch": 0.6749032030975008, "grad_norm": 1.8884564638137817, "learning_rate": 3.535075661529646e-06, "loss": 0.5968, "step": 9587 }, { "epoch": 0.674973600844773, "grad_norm": 1.5763615369796753, "learning_rate": 3.533688896490126e-06, "loss": 0.5913, "step": 9588 }, { "epoch": 0.675043998592045, "grad_norm": 1.7451555728912354, "learning_rate": 3.532302311667262e-06, "loss": 0.7158, "step": 9589 }, { "epoch": 0.6751143963393171, "grad_norm": 1.7790619134902954, "learning_rate": 3.5309159071331393e-06, "loss": 0.6961, "step": 9590 }, { "epoch": 0.6751847940865893, "grad_norm": 1.7640100717544556, "learning_rate": 3.5295296829598426e-06, "loss": 0.681, "step": 9591 }, { "epoch": 0.6752551918338613, "grad_norm": 1.6927580833435059, "learning_rate": 3.5281436392194406e-06, "loss": 0.6427, "step": 9592 }, { "epoch": 0.6753255895811334, "grad_norm": 2.0235867500305176, "learning_rate": 3.526757775983995e-06, "loss": 0.623, "step": 9593 }, { "epoch": 0.6753959873284054, "grad_norm": 1.9084289073944092, "learning_rate": 3.5253720933255555e-06, "loss": 0.6644, "step": 9594 }, { "epoch": 0.6754663850756776, "grad_norm": 1.7594949007034302, "learning_rate": 3.523986591316171e-06, "loss": 0.7612, "step": 9595 }, { "epoch": 0.6755367828229497, "grad_norm": 2.141514539718628, "learning_rate": 3.5226012700278688e-06, "loss": 0.7439, "step": 9596 }, { "epoch": 0.6756071805702217, "grad_norm": 1.9407038688659668, "learning_rate": 3.521216129532678e-06, "loss": 0.6718, "step": 9597 }, { "epoch": 0.6756775783174939, "grad_norm": 2.1379191875457764, "learning_rate": 3.5198311699026115e-06, "loss": 0.7543, "step": 9598 }, { "epoch": 0.6757479760647659, "grad_norm": 1.8247119188308716, "learning_rate": 3.5184463912096744e-06, "loss": 0.6819, "step": 9599 }, { "epoch": 0.675818373812038, "grad_norm": 1.7505089044570923, "learning_rate": 3.51706179352586e-06, "loss": 0.6486, "step": 9600 }, { "epoch": 0.6758887715593102, "grad_norm": 1.8196829557418823, "learning_rate": 3.5156773769231598e-06, "loss": 0.8067, "step": 9601 }, { "epoch": 0.6759591693065822, "grad_norm": 2.3212599754333496, "learning_rate": 3.514293141473546e-06, "loss": 0.7312, "step": 9602 }, { "epoch": 0.6760295670538543, "grad_norm": 1.6652593612670898, "learning_rate": 3.5129090872489903e-06, "loss": 0.6489, "step": 9603 }, { "epoch": 0.6760999648011263, "grad_norm": 1.572129726409912, "learning_rate": 3.5115252143214502e-06, "loss": 0.7139, "step": 9604 }, { "epoch": 0.6761703625483985, "grad_norm": 2.000436305999756, "learning_rate": 3.510141522762873e-06, "loss": 0.7092, "step": 9605 }, { "epoch": 0.6762407602956705, "grad_norm": 1.6774673461914062, "learning_rate": 3.508758012645196e-06, "loss": 0.6132, "step": 9606 }, { "epoch": 0.6763111580429426, "grad_norm": 1.796303391456604, "learning_rate": 3.507374684040355e-06, "loss": 0.635, "step": 9607 }, { "epoch": 0.6763815557902148, "grad_norm": 1.9706754684448242, "learning_rate": 3.5059915370202645e-06, "loss": 0.5958, "step": 9608 }, { "epoch": 0.6764519535374868, "grad_norm": 1.7144855260849, "learning_rate": 3.5046085716568403e-06, "loss": 0.6136, "step": 9609 }, { "epoch": 0.6765223512847589, "grad_norm": 1.8843894004821777, "learning_rate": 3.5032257880219828e-06, "loss": 0.7056, "step": 9610 }, { "epoch": 0.6765927490320309, "grad_norm": 1.7151457071304321, "learning_rate": 3.501843186187583e-06, "loss": 0.6977, "step": 9611 }, { "epoch": 0.6766631467793031, "grad_norm": 1.6163272857666016, "learning_rate": 3.500460766225521e-06, "loss": 0.7208, "step": 9612 }, { "epoch": 0.6767335445265752, "grad_norm": 2.081005573272705, "learning_rate": 3.499078528207677e-06, "loss": 0.6829, "step": 9613 }, { "epoch": 0.6768039422738472, "grad_norm": 1.6378560066223145, "learning_rate": 3.4976964722059077e-06, "loss": 0.6065, "step": 9614 }, { "epoch": 0.6768743400211193, "grad_norm": 2.2524969577789307, "learning_rate": 3.4963145982920727e-06, "loss": 0.7096, "step": 9615 }, { "epoch": 0.6769447377683914, "grad_norm": 1.9056650400161743, "learning_rate": 3.494932906538016e-06, "loss": 0.6171, "step": 9616 }, { "epoch": 0.6770151355156635, "grad_norm": 1.7184414863586426, "learning_rate": 3.493551397015571e-06, "loss": 0.7057, "step": 9617 }, { "epoch": 0.6770855332629356, "grad_norm": 2.5760021209716797, "learning_rate": 3.492170069796563e-06, "loss": 0.5984, "step": 9618 }, { "epoch": 0.6771559310102077, "grad_norm": 1.7121531963348389, "learning_rate": 3.490788924952811e-06, "loss": 0.6561, "step": 9619 }, { "epoch": 0.6772263287574798, "grad_norm": 1.803931713104248, "learning_rate": 3.489407962556119e-06, "loss": 0.6112, "step": 9620 }, { "epoch": 0.6772967265047518, "grad_norm": 1.6917119026184082, "learning_rate": 3.488027182678289e-06, "loss": 0.6877, "step": 9621 }, { "epoch": 0.6773671242520239, "grad_norm": 2.1594979763031006, "learning_rate": 3.486646585391107e-06, "loss": 0.6299, "step": 9622 }, { "epoch": 0.6774375219992961, "grad_norm": 3.835920572280884, "learning_rate": 3.48526617076635e-06, "loss": 0.6214, "step": 9623 }, { "epoch": 0.6775079197465681, "grad_norm": 1.6313978433609009, "learning_rate": 3.483885938875786e-06, "loss": 0.5816, "step": 9624 }, { "epoch": 0.6775783174938402, "grad_norm": 1.709115982055664, "learning_rate": 3.482505889791179e-06, "loss": 0.6439, "step": 9625 }, { "epoch": 0.6776487152411123, "grad_norm": 1.9716651439666748, "learning_rate": 3.4811260235842737e-06, "loss": 0.5988, "step": 9626 }, { "epoch": 0.6777191129883844, "grad_norm": 1.5748286247253418, "learning_rate": 3.4797463403268157e-06, "loss": 0.6174, "step": 9627 }, { "epoch": 0.6777895107356564, "grad_norm": 1.9641507863998413, "learning_rate": 3.478366840090533e-06, "loss": 0.6879, "step": 9628 }, { "epoch": 0.6778599084829285, "grad_norm": 1.9443857669830322, "learning_rate": 3.4769875229471486e-06, "loss": 0.6406, "step": 9629 }, { "epoch": 0.6779303062302007, "grad_norm": 2.7079031467437744, "learning_rate": 3.4756083889683706e-06, "loss": 0.7148, "step": 9630 }, { "epoch": 0.6780007039774727, "grad_norm": 1.655976414680481, "learning_rate": 3.474229438225904e-06, "loss": 0.5471, "step": 9631 }, { "epoch": 0.6780711017247448, "grad_norm": 1.9969831705093384, "learning_rate": 3.472850670791445e-06, "loss": 0.7403, "step": 9632 }, { "epoch": 0.6781414994720169, "grad_norm": 1.840749740600586, "learning_rate": 3.471472086736674e-06, "loss": 0.6711, "step": 9633 }, { "epoch": 0.678211897219289, "grad_norm": 1.9854642152786255, "learning_rate": 3.4700936861332656e-06, "loss": 0.6954, "step": 9634 }, { "epoch": 0.6782822949665611, "grad_norm": 1.5022225379943848, "learning_rate": 3.46871546905288e-06, "loss": 0.6318, "step": 9635 }, { "epoch": 0.6783526927138331, "grad_norm": 1.7800302505493164, "learning_rate": 3.4673374355671777e-06, "loss": 0.5815, "step": 9636 }, { "epoch": 0.6784230904611053, "grad_norm": 1.7448887825012207, "learning_rate": 3.4659595857477986e-06, "loss": 0.6312, "step": 9637 }, { "epoch": 0.6784934882083773, "grad_norm": 1.8354780673980713, "learning_rate": 3.464581919666385e-06, "loss": 0.6808, "step": 9638 }, { "epoch": 0.6785638859556494, "grad_norm": 1.9327635765075684, "learning_rate": 3.4632044373945577e-06, "loss": 0.6992, "step": 9639 }, { "epoch": 0.6786342837029216, "grad_norm": 1.868553638458252, "learning_rate": 3.461827139003936e-06, "loss": 0.6465, "step": 9640 }, { "epoch": 0.6787046814501936, "grad_norm": 1.6464048624038696, "learning_rate": 3.460450024566123e-06, "loss": 0.6273, "step": 9641 }, { "epoch": 0.6787750791974657, "grad_norm": 1.9481877088546753, "learning_rate": 3.459073094152721e-06, "loss": 0.7709, "step": 9642 }, { "epoch": 0.6788454769447377, "grad_norm": 1.9702844619750977, "learning_rate": 3.4576963478353133e-06, "loss": 0.673, "step": 9643 }, { "epoch": 0.6789158746920099, "grad_norm": 1.7631657123565674, "learning_rate": 3.456319785685483e-06, "loss": 0.7266, "step": 9644 }, { "epoch": 0.6789862724392819, "grad_norm": 1.9265705347061157, "learning_rate": 3.4549434077747965e-06, "loss": 0.5818, "step": 9645 }, { "epoch": 0.679056670186554, "grad_norm": 1.854010820388794, "learning_rate": 3.4535672141748133e-06, "loss": 0.6411, "step": 9646 }, { "epoch": 0.6791270679338262, "grad_norm": 2.146291494369507, "learning_rate": 3.4521912049570795e-06, "loss": 0.681, "step": 9647 }, { "epoch": 0.6791974656810982, "grad_norm": 1.9511109590530396, "learning_rate": 3.4508153801931404e-06, "loss": 0.7641, "step": 9648 }, { "epoch": 0.6792678634283703, "grad_norm": 1.939736008644104, "learning_rate": 3.4494397399545212e-06, "loss": 0.7771, "step": 9649 }, { "epoch": 0.6793382611756423, "grad_norm": 1.7828140258789062, "learning_rate": 3.448064284312749e-06, "loss": 0.6912, "step": 9650 }, { "epoch": 0.6794086589229145, "grad_norm": 1.7981231212615967, "learning_rate": 3.4466890133393305e-06, "loss": 0.7267, "step": 9651 }, { "epoch": 0.6794790566701866, "grad_norm": 1.8764609098434448, "learning_rate": 3.445313927105769e-06, "loss": 0.6176, "step": 9652 }, { "epoch": 0.6795494544174586, "grad_norm": 1.9188377857208252, "learning_rate": 3.443939025683553e-06, "loss": 0.7099, "step": 9653 }, { "epoch": 0.6796198521647308, "grad_norm": 1.6807130575180054, "learning_rate": 3.44256430914417e-06, "loss": 0.6797, "step": 9654 }, { "epoch": 0.6796902499120028, "grad_norm": 2.0973258018493652, "learning_rate": 3.441189777559088e-06, "loss": 0.7043, "step": 9655 }, { "epoch": 0.6797606476592749, "grad_norm": 1.950028657913208, "learning_rate": 3.4398154309997745e-06, "loss": 0.6808, "step": 9656 }, { "epoch": 0.679831045406547, "grad_norm": 1.8879201412200928, "learning_rate": 3.4384412695376792e-06, "loss": 0.656, "step": 9657 }, { "epoch": 0.6799014431538191, "grad_norm": 1.9734127521514893, "learning_rate": 3.437067293244253e-06, "loss": 0.598, "step": 9658 }, { "epoch": 0.6799718409010912, "grad_norm": 2.02650785446167, "learning_rate": 3.4356935021909195e-06, "loss": 0.6488, "step": 9659 }, { "epoch": 0.6800422386483632, "grad_norm": 2.0382115840911865, "learning_rate": 3.4343198964491117e-06, "loss": 0.6051, "step": 9660 }, { "epoch": 0.6801126363956354, "grad_norm": 1.742477297782898, "learning_rate": 3.432946476090239e-06, "loss": 0.7091, "step": 9661 }, { "epoch": 0.6801830341429074, "grad_norm": 2.718296766281128, "learning_rate": 3.431573241185712e-06, "loss": 0.6718, "step": 9662 }, { "epoch": 0.6802534318901795, "grad_norm": 2.184391975402832, "learning_rate": 3.430200191806921e-06, "loss": 0.6232, "step": 9663 }, { "epoch": 0.6803238296374516, "grad_norm": 2.223517894744873, "learning_rate": 3.4288273280252608e-06, "loss": 0.6614, "step": 9664 }, { "epoch": 0.6803942273847237, "grad_norm": 1.9078189134597778, "learning_rate": 3.427454649912097e-06, "loss": 0.6213, "step": 9665 }, { "epoch": 0.6804646251319958, "grad_norm": 1.8347957134246826, "learning_rate": 3.4260821575388037e-06, "loss": 0.7035, "step": 9666 }, { "epoch": 0.6805350228792678, "grad_norm": 1.7563501596450806, "learning_rate": 3.424709850976733e-06, "loss": 0.713, "step": 9667 }, { "epoch": 0.68060542062654, "grad_norm": 1.747071623802185, "learning_rate": 3.423337730297238e-06, "loss": 0.6592, "step": 9668 }, { "epoch": 0.6806758183738121, "grad_norm": 1.887338399887085, "learning_rate": 3.4219657955716512e-06, "loss": 0.6833, "step": 9669 }, { "epoch": 0.6807462161210841, "grad_norm": 3.8235697746276855, "learning_rate": 3.4205940468713078e-06, "loss": 0.7199, "step": 9670 }, { "epoch": 0.6808166138683562, "grad_norm": 1.665531873703003, "learning_rate": 3.4192224842675168e-06, "loss": 0.6162, "step": 9671 }, { "epoch": 0.6808870116156283, "grad_norm": 1.852760910987854, "learning_rate": 3.417851107831594e-06, "loss": 0.6304, "step": 9672 }, { "epoch": 0.6809574093629004, "grad_norm": 1.7917510271072388, "learning_rate": 3.416479917634834e-06, "loss": 0.6335, "step": 9673 }, { "epoch": 0.6810278071101725, "grad_norm": 1.6733452081680298, "learning_rate": 3.4151089137485317e-06, "loss": 0.6225, "step": 9674 }, { "epoch": 0.6810982048574445, "grad_norm": 2.00618314743042, "learning_rate": 3.41373809624396e-06, "loss": 0.6379, "step": 9675 }, { "epoch": 0.6811686026047167, "grad_norm": 2.0313611030578613, "learning_rate": 3.4123674651923986e-06, "loss": 0.6692, "step": 9676 }, { "epoch": 0.6812390003519887, "grad_norm": 1.804292917251587, "learning_rate": 3.410997020665096e-06, "loss": 0.6686, "step": 9677 }, { "epoch": 0.6813093980992608, "grad_norm": 2.040475845336914, "learning_rate": 3.409626762733312e-06, "loss": 0.7103, "step": 9678 }, { "epoch": 0.681379795846533, "grad_norm": 2.067575216293335, "learning_rate": 3.408256691468281e-06, "loss": 0.552, "step": 9679 }, { "epoch": 0.681450193593805, "grad_norm": 1.7989346981048584, "learning_rate": 3.4068868069412376e-06, "loss": 0.6633, "step": 9680 }, { "epoch": 0.6815205913410771, "grad_norm": 1.5478788614273071, "learning_rate": 3.4055171092234057e-06, "loss": 0.6202, "step": 9681 }, { "epoch": 0.6815909890883491, "grad_norm": 2.0661299228668213, "learning_rate": 3.4041475983859944e-06, "loss": 0.6726, "step": 9682 }, { "epoch": 0.6816613868356213, "grad_norm": 1.97454035282135, "learning_rate": 3.4027782745002073e-06, "loss": 0.6259, "step": 9683 }, { "epoch": 0.6817317845828933, "grad_norm": 1.8558063507080078, "learning_rate": 3.4014091376372315e-06, "loss": 0.7048, "step": 9684 }, { "epoch": 0.6818021823301654, "grad_norm": 2.4426748752593994, "learning_rate": 3.4000401878682573e-06, "loss": 0.7328, "step": 9685 }, { "epoch": 0.6818725800774376, "grad_norm": 1.9777158498764038, "learning_rate": 3.398671425264451e-06, "loss": 0.6249, "step": 9686 }, { "epoch": 0.6819429778247096, "grad_norm": 1.7155790328979492, "learning_rate": 3.3973028498969813e-06, "loss": 0.5337, "step": 9687 }, { "epoch": 0.6820133755719817, "grad_norm": 1.9081308841705322, "learning_rate": 3.3959344618369998e-06, "loss": 0.6658, "step": 9688 }, { "epoch": 0.6820837733192537, "grad_norm": 1.5579705238342285, "learning_rate": 3.3945662611556495e-06, "loss": 0.6374, "step": 9689 }, { "epoch": 0.6821541710665259, "grad_norm": 1.6553308963775635, "learning_rate": 3.3931982479240618e-06, "loss": 0.6524, "step": 9690 }, { "epoch": 0.682224568813798, "grad_norm": 1.8052480220794678, "learning_rate": 3.3918304222133666e-06, "loss": 0.7326, "step": 9691 }, { "epoch": 0.68229496656107, "grad_norm": 2.130686044692993, "learning_rate": 3.390462784094672e-06, "loss": 0.8323, "step": 9692 }, { "epoch": 0.6823653643083422, "grad_norm": 1.680199384689331, "learning_rate": 3.38909533363909e-06, "loss": 0.6153, "step": 9693 }, { "epoch": 0.6824357620556142, "grad_norm": 1.6699665784835815, "learning_rate": 3.3877280709177112e-06, "loss": 0.5972, "step": 9694 }, { "epoch": 0.6825061598028863, "grad_norm": 1.7271040678024292, "learning_rate": 3.386360996001622e-06, "loss": 0.6507, "step": 9695 }, { "epoch": 0.6825765575501584, "grad_norm": 1.6077675819396973, "learning_rate": 3.3849941089618936e-06, "loss": 0.6812, "step": 9696 }, { "epoch": 0.6826469552974305, "grad_norm": 1.8001341819763184, "learning_rate": 3.3836274098695986e-06, "loss": 0.6142, "step": 9697 }, { "epoch": 0.6827173530447026, "grad_norm": 1.6883219480514526, "learning_rate": 3.382260898795787e-06, "loss": 0.6374, "step": 9698 }, { "epoch": 0.6827877507919746, "grad_norm": 1.923975944519043, "learning_rate": 3.380894575811511e-06, "loss": 0.6915, "step": 9699 }, { "epoch": 0.6828581485392468, "grad_norm": 1.8200277090072632, "learning_rate": 3.3795284409878033e-06, "loss": 0.7372, "step": 9700 }, { "epoch": 0.6829285462865188, "grad_norm": 1.8282440900802612, "learning_rate": 3.378162494395691e-06, "loss": 0.6605, "step": 9701 }, { "epoch": 0.6829989440337909, "grad_norm": 1.8709651231765747, "learning_rate": 3.376796736106187e-06, "loss": 0.6722, "step": 9702 }, { "epoch": 0.683069341781063, "grad_norm": 1.872685432434082, "learning_rate": 3.375431166190306e-06, "loss": 0.6957, "step": 9703 }, { "epoch": 0.6831397395283351, "grad_norm": 1.8798104524612427, "learning_rate": 3.3740657847190382e-06, "loss": 0.7581, "step": 9704 }, { "epoch": 0.6832101372756072, "grad_norm": 1.7929614782333374, "learning_rate": 3.3727005917633766e-06, "loss": 0.6343, "step": 9705 }, { "epoch": 0.6832805350228792, "grad_norm": 1.720344066619873, "learning_rate": 3.3713355873942966e-06, "loss": 0.6756, "step": 9706 }, { "epoch": 0.6833509327701514, "grad_norm": 1.7152900695800781, "learning_rate": 3.3699707716827656e-06, "loss": 0.5958, "step": 9707 }, { "epoch": 0.6834213305174235, "grad_norm": 1.8341413736343384, "learning_rate": 3.368606144699739e-06, "loss": 0.6938, "step": 9708 }, { "epoch": 0.6834917282646955, "grad_norm": 1.6961334943771362, "learning_rate": 3.3672417065161705e-06, "loss": 0.7338, "step": 9709 }, { "epoch": 0.6835621260119676, "grad_norm": 1.6929547786712646, "learning_rate": 3.365877457202993e-06, "loss": 0.5902, "step": 9710 }, { "epoch": 0.6836325237592397, "grad_norm": 1.181296944618225, "learning_rate": 3.3645133968311405e-06, "loss": 0.752, "step": 9711 }, { "epoch": 0.6837029215065118, "grad_norm": 2.04714298248291, "learning_rate": 3.3631495254715284e-06, "loss": 0.76, "step": 9712 }, { "epoch": 0.6837733192537839, "grad_norm": 1.7080050706863403, "learning_rate": 3.3617858431950668e-06, "loss": 0.6038, "step": 9713 }, { "epoch": 0.683843717001056, "grad_norm": 1.5223158597946167, "learning_rate": 3.360422350072651e-06, "loss": 0.6787, "step": 9714 }, { "epoch": 0.6839141147483281, "grad_norm": 2.244065999984741, "learning_rate": 3.359059046175177e-06, "loss": 0.753, "step": 9715 }, { "epoch": 0.6839845124956001, "grad_norm": 1.5431822538375854, "learning_rate": 3.3576959315735173e-06, "loss": 0.7103, "step": 9716 }, { "epoch": 0.6840549102428722, "grad_norm": 1.6715564727783203, "learning_rate": 3.356333006338547e-06, "loss": 0.6944, "step": 9717 }, { "epoch": 0.6841253079901443, "grad_norm": 2.172600746154785, "learning_rate": 3.3549702705411245e-06, "loss": 0.7739, "step": 9718 }, { "epoch": 0.6841957057374164, "grad_norm": 1.5589364767074585, "learning_rate": 3.3536077242520986e-06, "loss": 0.6548, "step": 9719 }, { "epoch": 0.6842661034846885, "grad_norm": 1.4670262336730957, "learning_rate": 3.3522453675423063e-06, "loss": 0.634, "step": 9720 }, { "epoch": 0.6843365012319605, "grad_norm": 2.058462381362915, "learning_rate": 3.350883200482584e-06, "loss": 0.667, "step": 9721 }, { "epoch": 0.6844068989792327, "grad_norm": 2.2909421920776367, "learning_rate": 3.3495212231437464e-06, "loss": 0.7615, "step": 9722 }, { "epoch": 0.6844772967265047, "grad_norm": 2.250261068344116, "learning_rate": 3.348159435596609e-06, "loss": 0.6461, "step": 9723 }, { "epoch": 0.6845476944737768, "grad_norm": 1.6619738340377808, "learning_rate": 3.34679783791197e-06, "loss": 0.6921, "step": 9724 }, { "epoch": 0.684618092221049, "grad_norm": 3.1846871376037598, "learning_rate": 3.3454364301606195e-06, "loss": 0.6925, "step": 9725 }, { "epoch": 0.684688489968321, "grad_norm": 1.9113070964813232, "learning_rate": 3.344075212413336e-06, "loss": 0.6188, "step": 9726 }, { "epoch": 0.6847588877155931, "grad_norm": 1.9448421001434326, "learning_rate": 3.3427141847408963e-06, "loss": 0.6807, "step": 9727 }, { "epoch": 0.6848292854628651, "grad_norm": 1.1566511392593384, "learning_rate": 3.3413533472140556e-06, "loss": 0.8027, "step": 9728 }, { "epoch": 0.6848996832101373, "grad_norm": 2.151968002319336, "learning_rate": 3.339992699903571e-06, "loss": 0.7468, "step": 9729 }, { "epoch": 0.6849700809574094, "grad_norm": 1.6231049299240112, "learning_rate": 3.3386322428801803e-06, "loss": 0.5402, "step": 9730 }, { "epoch": 0.6850404787046814, "grad_norm": 1.7295827865600586, "learning_rate": 3.3372719762146117e-06, "loss": 0.6609, "step": 9731 }, { "epoch": 0.6851108764519536, "grad_norm": 1.8366844654083252, "learning_rate": 3.335911899977594e-06, "loss": 0.5769, "step": 9732 }, { "epoch": 0.6851812741992256, "grad_norm": 1.97739839553833, "learning_rate": 3.334552014239832e-06, "loss": 0.6351, "step": 9733 }, { "epoch": 0.6852516719464977, "grad_norm": 1.6563621759414673, "learning_rate": 3.333192319072033e-06, "loss": 0.7049, "step": 9734 }, { "epoch": 0.6853220696937699, "grad_norm": 1.561495304107666, "learning_rate": 3.3318328145448864e-06, "loss": 0.5648, "step": 9735 }, { "epoch": 0.6853924674410419, "grad_norm": 1.7893108129501343, "learning_rate": 3.330473500729073e-06, "loss": 0.5896, "step": 9736 }, { "epoch": 0.685462865188314, "grad_norm": 1.8764113187789917, "learning_rate": 3.3291143776952637e-06, "loss": 0.675, "step": 9737 }, { "epoch": 0.685533262935586, "grad_norm": 1.665452241897583, "learning_rate": 3.327755445514125e-06, "loss": 0.6265, "step": 9738 }, { "epoch": 0.6856036606828582, "grad_norm": 1.8162109851837158, "learning_rate": 3.3263967042563045e-06, "loss": 0.6354, "step": 9739 }, { "epoch": 0.6856740584301302, "grad_norm": 4.292539596557617, "learning_rate": 3.325038153992448e-06, "loss": 0.5504, "step": 9740 }, { "epoch": 0.6857444561774023, "grad_norm": 1.5601040124893188, "learning_rate": 3.323679794793186e-06, "loss": 0.5349, "step": 9741 }, { "epoch": 0.6858148539246745, "grad_norm": 1.967842936515808, "learning_rate": 3.322321626729142e-06, "loss": 0.6792, "step": 9742 }, { "epoch": 0.6858852516719465, "grad_norm": 1.7688243389129639, "learning_rate": 3.3209636498709237e-06, "loss": 0.7132, "step": 9743 }, { "epoch": 0.6859556494192186, "grad_norm": 2.313899517059326, "learning_rate": 3.3196058642891402e-06, "loss": 0.7338, "step": 9744 }, { "epoch": 0.6860260471664906, "grad_norm": 1.8735427856445312, "learning_rate": 3.3182482700543785e-06, "loss": 0.6521, "step": 9745 }, { "epoch": 0.6860964449137628, "grad_norm": 2.304257869720459, "learning_rate": 3.316890867237226e-06, "loss": 0.694, "step": 9746 }, { "epoch": 0.6861668426610349, "grad_norm": 1.8925052881240845, "learning_rate": 3.3155336559082527e-06, "loss": 0.7834, "step": 9747 }, { "epoch": 0.6862372404083069, "grad_norm": 2.2385096549987793, "learning_rate": 3.3141766361380222e-06, "loss": 0.6104, "step": 9748 }, { "epoch": 0.686307638155579, "grad_norm": 2.0116429328918457, "learning_rate": 3.3128198079970834e-06, "loss": 0.6901, "step": 9749 }, { "epoch": 0.6863780359028511, "grad_norm": 1.9165788888931274, "learning_rate": 3.311463171555985e-06, "loss": 0.6453, "step": 9750 }, { "epoch": 0.6864484336501232, "grad_norm": 1.9135514497756958, "learning_rate": 3.310106726885254e-06, "loss": 0.7627, "step": 9751 }, { "epoch": 0.6865188313973953, "grad_norm": 1.8727658987045288, "learning_rate": 3.308750474055419e-06, "loss": 0.6638, "step": 9752 }, { "epoch": 0.6865892291446674, "grad_norm": 1.8933016061782837, "learning_rate": 3.307394413136989e-06, "loss": 0.668, "step": 9753 }, { "epoch": 0.6866596268919395, "grad_norm": 2.046257734298706, "learning_rate": 3.3060385442004684e-06, "loss": 0.6432, "step": 9754 }, { "epoch": 0.6867300246392115, "grad_norm": 1.7414416074752808, "learning_rate": 3.304682867316347e-06, "loss": 0.6471, "step": 9755 }, { "epoch": 0.6868004223864836, "grad_norm": 1.7298564910888672, "learning_rate": 3.3033273825551117e-06, "loss": 0.6549, "step": 9756 }, { "epoch": 0.6868708201337557, "grad_norm": 1.6054898500442505, "learning_rate": 3.301972089987231e-06, "loss": 0.5841, "step": 9757 }, { "epoch": 0.6869412178810278, "grad_norm": 1.9043294191360474, "learning_rate": 3.300616989683174e-06, "loss": 0.6751, "step": 9758 }, { "epoch": 0.6870116156282999, "grad_norm": 1.7142916917800903, "learning_rate": 3.2992620817133897e-06, "loss": 0.5984, "step": 9759 }, { "epoch": 0.687082013375572, "grad_norm": 1.7733039855957031, "learning_rate": 3.2979073661483214e-06, "loss": 0.6521, "step": 9760 }, { "epoch": 0.6871524111228441, "grad_norm": 1.5951529741287231, "learning_rate": 3.2965528430583987e-06, "loss": 0.7375, "step": 9761 }, { "epoch": 0.6872228088701161, "grad_norm": 1.6974502801895142, "learning_rate": 3.2951985125140515e-06, "loss": 0.7624, "step": 9762 }, { "epoch": 0.6872932066173882, "grad_norm": 1.585476279258728, "learning_rate": 3.2938443745856855e-06, "loss": 0.6223, "step": 9763 }, { "epoch": 0.6873636043646604, "grad_norm": 2.4664852619171143, "learning_rate": 3.29249042934371e-06, "loss": 0.6241, "step": 9764 }, { "epoch": 0.6874340021119324, "grad_norm": 1.9692572355270386, "learning_rate": 3.2911366768585117e-06, "loss": 0.6327, "step": 9765 }, { "epoch": 0.6875043998592045, "grad_norm": 1.7356185913085938, "learning_rate": 3.289783117200482e-06, "loss": 0.6416, "step": 9766 }, { "epoch": 0.6875747976064766, "grad_norm": 1.6607965230941772, "learning_rate": 3.2884297504399826e-06, "loss": 0.6596, "step": 9767 }, { "epoch": 0.6876451953537487, "grad_norm": 1.9120712280273438, "learning_rate": 3.2870765766473847e-06, "loss": 0.7271, "step": 9768 }, { "epoch": 0.6877155931010208, "grad_norm": 1.7009334564208984, "learning_rate": 3.285723595893036e-06, "loss": 0.687, "step": 9769 }, { "epoch": 0.6877859908482928, "grad_norm": 1.5202540159225464, "learning_rate": 3.2843708082472835e-06, "loss": 0.7555, "step": 9770 }, { "epoch": 0.687856388595565, "grad_norm": 1.8177286386489868, "learning_rate": 3.283018213780456e-06, "loss": 0.6414, "step": 9771 }, { "epoch": 0.687926786342837, "grad_norm": 1.8193960189819336, "learning_rate": 3.281665812562882e-06, "loss": 0.6718, "step": 9772 }, { "epoch": 0.6879971840901091, "grad_norm": 1.6935453414916992, "learning_rate": 3.2803136046648658e-06, "loss": 0.6956, "step": 9773 }, { "epoch": 0.6880675818373813, "grad_norm": 1.5766830444335938, "learning_rate": 3.278961590156715e-06, "loss": 0.645, "step": 9774 }, { "epoch": 0.6881379795846533, "grad_norm": 1.8148837089538574, "learning_rate": 3.27760976910872e-06, "loss": 0.7029, "step": 9775 }, { "epoch": 0.6882083773319254, "grad_norm": 2.1576030254364014, "learning_rate": 3.2762581415911663e-06, "loss": 0.6368, "step": 9776 }, { "epoch": 0.6882787750791974, "grad_norm": 2.0971839427948, "learning_rate": 3.274906707674322e-06, "loss": 0.6791, "step": 9777 }, { "epoch": 0.6883491728264696, "grad_norm": 1.884069561958313, "learning_rate": 3.273555467428456e-06, "loss": 0.7073, "step": 9778 }, { "epoch": 0.6884195705737416, "grad_norm": 2.1183488368988037, "learning_rate": 3.272204420923812e-06, "loss": 0.7184, "step": 9779 }, { "epoch": 0.6884899683210137, "grad_norm": 1.8559417724609375, "learning_rate": 3.2708535682306352e-06, "loss": 0.5825, "step": 9780 }, { "epoch": 0.6885603660682859, "grad_norm": 1.8890330791473389, "learning_rate": 3.2695029094191624e-06, "loss": 0.6556, "step": 9781 }, { "epoch": 0.6886307638155579, "grad_norm": 2.424203872680664, "learning_rate": 3.268152444559609e-06, "loss": 0.683, "step": 9782 }, { "epoch": 0.68870116156283, "grad_norm": 1.6708738803863525, "learning_rate": 3.2668021737221912e-06, "loss": 0.738, "step": 9783 }, { "epoch": 0.688771559310102, "grad_norm": 2.5689682960510254, "learning_rate": 3.265452096977111e-06, "loss": 0.6378, "step": 9784 }, { "epoch": 0.6888419570573742, "grad_norm": 1.7556096315383911, "learning_rate": 3.2641022143945577e-06, "loss": 0.7864, "step": 9785 }, { "epoch": 0.6889123548046463, "grad_norm": 1.8933371305465698, "learning_rate": 3.262752526044711e-06, "loss": 0.6483, "step": 9786 }, { "epoch": 0.6889827525519183, "grad_norm": 1.9419466257095337, "learning_rate": 3.261403031997748e-06, "loss": 0.7188, "step": 9787 }, { "epoch": 0.6890531502991905, "grad_norm": 2.064037799835205, "learning_rate": 3.2600537323238243e-06, "loss": 0.7826, "step": 9788 }, { "epoch": 0.6891235480464625, "grad_norm": 1.825218677520752, "learning_rate": 3.2587046270930967e-06, "loss": 0.5555, "step": 9789 }, { "epoch": 0.6891939457937346, "grad_norm": 1.656275749206543, "learning_rate": 3.2573557163757047e-06, "loss": 0.6013, "step": 9790 }, { "epoch": 0.6892643435410067, "grad_norm": 1.6736310720443726, "learning_rate": 3.2560070002417777e-06, "loss": 0.5685, "step": 9791 }, { "epoch": 0.6893347412882788, "grad_norm": 1.653631567955017, "learning_rate": 3.2546584787614346e-06, "loss": 0.5948, "step": 9792 }, { "epoch": 0.6894051390355509, "grad_norm": 1.9809132814407349, "learning_rate": 3.2533101520047928e-06, "loss": 0.6493, "step": 9793 }, { "epoch": 0.6894755367828229, "grad_norm": 1.6101429462432861, "learning_rate": 3.2519620200419466e-06, "loss": 0.6792, "step": 9794 }, { "epoch": 0.689545934530095, "grad_norm": 1.874140739440918, "learning_rate": 3.2506140829429915e-06, "loss": 0.7416, "step": 9795 }, { "epoch": 0.6896163322773671, "grad_norm": 2.0044023990631104, "learning_rate": 3.2492663407780064e-06, "loss": 0.6419, "step": 9796 }, { "epoch": 0.6896867300246392, "grad_norm": 1.9635941982269287, "learning_rate": 3.2479187936170603e-06, "loss": 0.7253, "step": 9797 }, { "epoch": 0.6897571277719113, "grad_norm": 1.8705487251281738, "learning_rate": 3.246571441530212e-06, "loss": 0.6559, "step": 9798 }, { "epoch": 0.6898275255191834, "grad_norm": 2.071101665496826, "learning_rate": 3.2452242845875163e-06, "loss": 0.7379, "step": 9799 }, { "epoch": 0.6898979232664555, "grad_norm": 1.8363176584243774, "learning_rate": 3.243877322859009e-06, "loss": 0.6223, "step": 9800 }, { "epoch": 0.6899683210137275, "grad_norm": 2.0794527530670166, "learning_rate": 3.2425305564147223e-06, "loss": 0.6776, "step": 9801 }, { "epoch": 0.6900387187609996, "grad_norm": 1.7251089811325073, "learning_rate": 3.241183985324676e-06, "loss": 0.7132, "step": 9802 }, { "epoch": 0.6901091165082718, "grad_norm": 1.5502578020095825, "learning_rate": 3.2398376096588784e-06, "loss": 0.7546, "step": 9803 }, { "epoch": 0.6901795142555438, "grad_norm": 1.6666392087936401, "learning_rate": 3.2384914294873266e-06, "loss": 0.6857, "step": 9804 }, { "epoch": 0.6902499120028159, "grad_norm": 1.8543604612350464, "learning_rate": 3.237145444880014e-06, "loss": 0.6124, "step": 9805 }, { "epoch": 0.690320309750088, "grad_norm": 1.7732105255126953, "learning_rate": 3.2357996559069153e-06, "loss": 0.729, "step": 9806 }, { "epoch": 0.6903907074973601, "grad_norm": 1.7156692743301392, "learning_rate": 3.2344540626380036e-06, "loss": 0.705, "step": 9807 }, { "epoch": 0.6904611052446322, "grad_norm": 1.922350287437439, "learning_rate": 3.233108665143236e-06, "loss": 0.7081, "step": 9808 }, { "epoch": 0.6905315029919042, "grad_norm": 1.7373038530349731, "learning_rate": 3.23176346349256e-06, "loss": 0.6317, "step": 9809 }, { "epoch": 0.6906019007391764, "grad_norm": 2.240330219268799, "learning_rate": 3.230418457755912e-06, "loss": 0.7687, "step": 9810 }, { "epoch": 0.6906722984864484, "grad_norm": 1.7646106481552124, "learning_rate": 3.2290736480032253e-06, "loss": 0.7504, "step": 9811 }, { "epoch": 0.6907426962337205, "grad_norm": 1.9697329998016357, "learning_rate": 3.2277290343044115e-06, "loss": 0.705, "step": 9812 }, { "epoch": 0.6908130939809926, "grad_norm": 1.7590794563293457, "learning_rate": 3.2263846167293845e-06, "loss": 0.6152, "step": 9813 }, { "epoch": 0.6908834917282647, "grad_norm": 1.683505892753601, "learning_rate": 3.2250403953480384e-06, "loss": 0.6377, "step": 9814 }, { "epoch": 0.6909538894755368, "grad_norm": 1.7183583974838257, "learning_rate": 3.2236963702302616e-06, "loss": 0.561, "step": 9815 }, { "epoch": 0.6910242872228088, "grad_norm": 2.2967069149017334, "learning_rate": 3.2223525414459272e-06, "loss": 0.7747, "step": 9816 }, { "epoch": 0.691094684970081, "grad_norm": 1.6193082332611084, "learning_rate": 3.2210089090649083e-06, "loss": 0.8489, "step": 9817 }, { "epoch": 0.691165082717353, "grad_norm": 1.962459921836853, "learning_rate": 3.2196654731570567e-06, "loss": 0.7523, "step": 9818 }, { "epoch": 0.6912354804646251, "grad_norm": 1.6523611545562744, "learning_rate": 3.218322233792223e-06, "loss": 0.7792, "step": 9819 }, { "epoch": 0.6913058782118973, "grad_norm": 1.7787739038467407, "learning_rate": 3.2169791910402416e-06, "loss": 0.5944, "step": 9820 }, { "epoch": 0.6913762759591693, "grad_norm": 1.9278583526611328, "learning_rate": 3.215636344970937e-06, "loss": 0.5728, "step": 9821 }, { "epoch": 0.6914466737064414, "grad_norm": 1.86435866355896, "learning_rate": 3.2142936956541246e-06, "loss": 0.6688, "step": 9822 }, { "epoch": 0.6915170714537134, "grad_norm": 1.6299535036087036, "learning_rate": 3.2129512431596143e-06, "loss": 0.5897, "step": 9823 }, { "epoch": 0.6915874692009856, "grad_norm": 1.7736730575561523, "learning_rate": 3.2116089875571948e-06, "loss": 0.7876, "step": 9824 }, { "epoch": 0.6916578669482577, "grad_norm": 1.7368407249450684, "learning_rate": 3.2102669289166585e-06, "loss": 0.7047, "step": 9825 }, { "epoch": 0.6917282646955297, "grad_norm": 1.8418902158737183, "learning_rate": 3.2089250673077765e-06, "loss": 0.6936, "step": 9826 }, { "epoch": 0.6917986624428019, "grad_norm": 1.717640995979309, "learning_rate": 3.2075834028003137e-06, "loss": 0.5509, "step": 9827 }, { "epoch": 0.6918690601900739, "grad_norm": 2.045048713684082, "learning_rate": 3.2062419354640225e-06, "loss": 0.6287, "step": 9828 }, { "epoch": 0.691939457937346, "grad_norm": 1.9776087999343872, "learning_rate": 3.2049006653686505e-06, "loss": 0.7094, "step": 9829 }, { "epoch": 0.6920098556846181, "grad_norm": 1.599056363105774, "learning_rate": 3.2035595925839277e-06, "loss": 0.6744, "step": 9830 }, { "epoch": 0.6920802534318902, "grad_norm": 1.7318631410598755, "learning_rate": 3.202218717179583e-06, "loss": 0.6283, "step": 9831 }, { "epoch": 0.6921506511791623, "grad_norm": 1.8172944784164429, "learning_rate": 3.2008780392253258e-06, "loss": 0.6566, "step": 9832 }, { "epoch": 0.6922210489264343, "grad_norm": 2.206665277481079, "learning_rate": 3.1995375587908574e-06, "loss": 0.706, "step": 9833 }, { "epoch": 0.6922914466737065, "grad_norm": 1.8388936519622803, "learning_rate": 3.1981972759458767e-06, "loss": 0.6891, "step": 9834 }, { "epoch": 0.6923618444209785, "grad_norm": 1.5803570747375488, "learning_rate": 3.1968571907600598e-06, "loss": 0.5307, "step": 9835 }, { "epoch": 0.6924322421682506, "grad_norm": 1.6953201293945312, "learning_rate": 3.195517303303084e-06, "loss": 0.7655, "step": 9836 }, { "epoch": 0.6925026399155227, "grad_norm": 1.8079746961593628, "learning_rate": 3.1941776136446097e-06, "loss": 0.6195, "step": 9837 }, { "epoch": 0.6925730376627948, "grad_norm": 1.8813693523406982, "learning_rate": 3.192838121854288e-06, "loss": 0.6651, "step": 9838 }, { "epoch": 0.6926434354100669, "grad_norm": 2.1017818450927734, "learning_rate": 3.191498828001757e-06, "loss": 0.536, "step": 9839 }, { "epoch": 0.6927138331573389, "grad_norm": 1.6424918174743652, "learning_rate": 3.1901597321566545e-06, "loss": 0.6644, "step": 9840 }, { "epoch": 0.692784230904611, "grad_norm": 2.1937224864959717, "learning_rate": 3.1888208343885953e-06, "loss": 0.7817, "step": 9841 }, { "epoch": 0.6928546286518832, "grad_norm": 1.7382913827896118, "learning_rate": 3.187482134767195e-06, "loss": 0.612, "step": 9842 }, { "epoch": 0.6929250263991552, "grad_norm": 2.104919910430908, "learning_rate": 3.1861436333620508e-06, "loss": 0.6955, "step": 9843 }, { "epoch": 0.6929954241464273, "grad_norm": 1.9142935276031494, "learning_rate": 3.184805330242754e-06, "loss": 0.6314, "step": 9844 }, { "epoch": 0.6930658218936994, "grad_norm": 1.8659745454788208, "learning_rate": 3.1834672254788804e-06, "loss": 0.6622, "step": 9845 }, { "epoch": 0.6931362196409715, "grad_norm": 1.925085425376892, "learning_rate": 3.1821293191400045e-06, "loss": 0.6898, "step": 9846 }, { "epoch": 0.6932066173882436, "grad_norm": 2.0238122940063477, "learning_rate": 3.1807916112956804e-06, "loss": 0.6385, "step": 9847 }, { "epoch": 0.6932770151355157, "grad_norm": 1.8964465856552124, "learning_rate": 3.1794541020154625e-06, "loss": 0.5925, "step": 9848 }, { "epoch": 0.6933474128827878, "grad_norm": 1.7700719833374023, "learning_rate": 3.178116791368885e-06, "loss": 0.6858, "step": 9849 }, { "epoch": 0.6934178106300598, "grad_norm": 1.5642682313919067, "learning_rate": 3.176779679425478e-06, "loss": 0.5829, "step": 9850 }, { "epoch": 0.6934882083773319, "grad_norm": 2.009078025817871, "learning_rate": 3.175442766254754e-06, "loss": 0.6342, "step": 9851 }, { "epoch": 0.693558606124604, "grad_norm": 4.457292556762695, "learning_rate": 3.1741060519262283e-06, "loss": 0.6855, "step": 9852 }, { "epoch": 0.6936290038718761, "grad_norm": 1.7879716157913208, "learning_rate": 3.1727695365093903e-06, "loss": 0.6995, "step": 9853 }, { "epoch": 0.6936994016191482, "grad_norm": 1.7567517757415771, "learning_rate": 3.1714332200737334e-06, "loss": 0.6989, "step": 9854 }, { "epoch": 0.6937697993664202, "grad_norm": 1.6670564413070679, "learning_rate": 3.1700971026887303e-06, "loss": 0.6836, "step": 9855 }, { "epoch": 0.6938401971136924, "grad_norm": 1.6746838092803955, "learning_rate": 3.1687611844238475e-06, "loss": 0.7551, "step": 9856 }, { "epoch": 0.6939105948609644, "grad_norm": 1.623213768005371, "learning_rate": 3.167425465348538e-06, "loss": 0.6041, "step": 9857 }, { "epoch": 0.6939809926082365, "grad_norm": 1.6682995557785034, "learning_rate": 3.1660899455322525e-06, "loss": 0.6757, "step": 9858 }, { "epoch": 0.6940513903555087, "grad_norm": 2.054215669631958, "learning_rate": 3.1647546250444195e-06, "loss": 0.7159, "step": 9859 }, { "epoch": 0.6941217881027807, "grad_norm": 1.6279382705688477, "learning_rate": 3.16341950395447e-06, "loss": 0.6111, "step": 9860 }, { "epoch": 0.6941921858500528, "grad_norm": 1.4929808378219604, "learning_rate": 3.1620845823318158e-06, "loss": 0.5964, "step": 9861 }, { "epoch": 0.6942625835973248, "grad_norm": 1.7469303607940674, "learning_rate": 3.1607498602458586e-06, "loss": 0.6287, "step": 9862 }, { "epoch": 0.694332981344597, "grad_norm": 2.0467331409454346, "learning_rate": 3.1594153377659916e-06, "loss": 0.6918, "step": 9863 }, { "epoch": 0.6944033790918691, "grad_norm": 1.7731214761734009, "learning_rate": 3.1580810149616016e-06, "loss": 0.7643, "step": 9864 }, { "epoch": 0.6944737768391411, "grad_norm": 1.7016927003860474, "learning_rate": 3.1567468919020564e-06, "loss": 0.6056, "step": 9865 }, { "epoch": 0.6945441745864133, "grad_norm": 1.5842317342758179, "learning_rate": 3.1554129686567245e-06, "loss": 0.6301, "step": 9866 }, { "epoch": 0.6946145723336853, "grad_norm": 1.8232433795928955, "learning_rate": 3.1540792452949534e-06, "loss": 0.7421, "step": 9867 }, { "epoch": 0.6946849700809574, "grad_norm": 1.8021427392959595, "learning_rate": 3.1527457218860855e-06, "loss": 0.6131, "step": 9868 }, { "epoch": 0.6947553678282294, "grad_norm": 1.9509978294372559, "learning_rate": 3.151412398499449e-06, "loss": 0.6919, "step": 9869 }, { "epoch": 0.6948257655755016, "grad_norm": 2.3056869506835938, "learning_rate": 3.15007927520437e-06, "loss": 0.592, "step": 9870 }, { "epoch": 0.6948961633227737, "grad_norm": 3.8052291870117188, "learning_rate": 3.1487463520701534e-06, "loss": 0.7171, "step": 9871 }, { "epoch": 0.6949665610700457, "grad_norm": 2.664735794067383, "learning_rate": 3.147413629166105e-06, "loss": 0.587, "step": 9872 }, { "epoch": 0.6950369588173179, "grad_norm": 2.32882022857666, "learning_rate": 3.146081106561509e-06, "loss": 0.7728, "step": 9873 }, { "epoch": 0.6951073565645899, "grad_norm": 2.0157485008239746, "learning_rate": 3.1447487843256504e-06, "loss": 0.6754, "step": 9874 }, { "epoch": 0.695177754311862, "grad_norm": 6.115815162658691, "learning_rate": 3.14341666252779e-06, "loss": 0.5622, "step": 9875 }, { "epoch": 0.6952481520591342, "grad_norm": 1.8709293603897095, "learning_rate": 3.1420847412371916e-06, "loss": 0.639, "step": 9876 }, { "epoch": 0.6953185498064062, "grad_norm": 3.946664571762085, "learning_rate": 3.1407530205230995e-06, "loss": 0.6322, "step": 9877 }, { "epoch": 0.6953889475536783, "grad_norm": 2.7696781158447266, "learning_rate": 3.1394215004547555e-06, "loss": 0.7086, "step": 9878 }, { "epoch": 0.6954593453009503, "grad_norm": 8.076419830322266, "learning_rate": 3.1380901811013817e-06, "loss": 0.726, "step": 9879 }, { "epoch": 0.6955297430482225, "grad_norm": 37.39459991455078, "learning_rate": 3.136759062532202e-06, "loss": 0.6528, "step": 9880 }, { "epoch": 0.6956001407954946, "grad_norm": 1.7688554525375366, "learning_rate": 3.1354281448164115e-06, "loss": 0.6524, "step": 9881 }, { "epoch": 0.6956705385427666, "grad_norm": 1.692724585533142, "learning_rate": 3.134097428023213e-06, "loss": 0.5844, "step": 9882 }, { "epoch": 0.6957409362900387, "grad_norm": 1.729780673980713, "learning_rate": 3.132766912221792e-06, "loss": 0.6179, "step": 9883 }, { "epoch": 0.6958113340373108, "grad_norm": 2.1611130237579346, "learning_rate": 3.1314365974813196e-06, "loss": 0.6504, "step": 9884 }, { "epoch": 0.6958817317845829, "grad_norm": 1.872864842414856, "learning_rate": 3.1301064838709644e-06, "loss": 0.5596, "step": 9885 }, { "epoch": 0.695952129531855, "grad_norm": 1.8586758375167847, "learning_rate": 3.1287765714598777e-06, "loss": 0.6792, "step": 9886 }, { "epoch": 0.6960225272791271, "grad_norm": 1.8439899682998657, "learning_rate": 3.127446860317203e-06, "loss": 0.7344, "step": 9887 }, { "epoch": 0.6960929250263992, "grad_norm": 1.8635179996490479, "learning_rate": 3.126117350512071e-06, "loss": 0.6504, "step": 9888 }, { "epoch": 0.6961633227736712, "grad_norm": 2.055717706680298, "learning_rate": 3.1247880421136085e-06, "loss": 0.7057, "step": 9889 }, { "epoch": 0.6962337205209433, "grad_norm": 1.7904536724090576, "learning_rate": 3.1234589351909234e-06, "loss": 0.6681, "step": 9890 }, { "epoch": 0.6963041182682154, "grad_norm": 1.7483241558074951, "learning_rate": 3.1221300298131213e-06, "loss": 0.6602, "step": 9891 }, { "epoch": 0.6963745160154875, "grad_norm": 1.506252408027649, "learning_rate": 3.1208013260492903e-06, "loss": 0.7405, "step": 9892 }, { "epoch": 0.6964449137627596, "grad_norm": 1.8096550703048706, "learning_rate": 3.119472823968512e-06, "loss": 0.6964, "step": 9893 }, { "epoch": 0.6965153115100317, "grad_norm": 1.7798280715942383, "learning_rate": 3.118144523639854e-06, "loss": 0.626, "step": 9894 }, { "epoch": 0.6965857092573038, "grad_norm": 1.7485551834106445, "learning_rate": 3.1168164251323795e-06, "loss": 0.7572, "step": 9895 }, { "epoch": 0.6966561070045758, "grad_norm": 1.6644424200057983, "learning_rate": 3.1154885285151336e-06, "loss": 0.6945, "step": 9896 }, { "epoch": 0.6967265047518479, "grad_norm": 1.778469443321228, "learning_rate": 3.1141608338571604e-06, "loss": 0.6262, "step": 9897 }, { "epoch": 0.6967969024991201, "grad_norm": 1.647702932357788, "learning_rate": 3.112833341227484e-06, "loss": 0.7133, "step": 9898 }, { "epoch": 0.6968673002463921, "grad_norm": 1.669845700263977, "learning_rate": 3.111506050695123e-06, "loss": 0.6301, "step": 9899 }, { "epoch": 0.6969376979936642, "grad_norm": 1.6794837713241577, "learning_rate": 3.1101789623290808e-06, "loss": 0.6021, "step": 9900 }, { "epoch": 0.6970080957409363, "grad_norm": 1.6988531351089478, "learning_rate": 3.10885207619836e-06, "loss": 0.5916, "step": 9901 }, { "epoch": 0.6970784934882084, "grad_norm": 2.176445484161377, "learning_rate": 3.1075253923719416e-06, "loss": 0.6666, "step": 9902 }, { "epoch": 0.6971488912354805, "grad_norm": 1.7917377948760986, "learning_rate": 3.106198910918806e-06, "loss": 0.6541, "step": 9903 }, { "epoch": 0.6972192889827525, "grad_norm": 1.8982311487197876, "learning_rate": 3.104872631907915e-06, "loss": 0.6418, "step": 9904 }, { "epoch": 0.6972896867300247, "grad_norm": 1.776572823524475, "learning_rate": 3.1035465554082233e-06, "loss": 0.5696, "step": 9905 }, { "epoch": 0.6973600844772967, "grad_norm": 1.4719231128692627, "learning_rate": 3.1022206814886724e-06, "loss": 0.6739, "step": 9906 }, { "epoch": 0.6974304822245688, "grad_norm": 1.6634774208068848, "learning_rate": 3.100895010218202e-06, "loss": 0.6156, "step": 9907 }, { "epoch": 0.6975008799718408, "grad_norm": 1.8805830478668213, "learning_rate": 3.099569541665728e-06, "loss": 0.6759, "step": 9908 }, { "epoch": 0.697571277719113, "grad_norm": 1.9529987573623657, "learning_rate": 3.098244275900168e-06, "loss": 0.6522, "step": 9909 }, { "epoch": 0.6976416754663851, "grad_norm": 1.4345203638076782, "learning_rate": 3.096919212990422e-06, "loss": 0.6092, "step": 9910 }, { "epoch": 0.6977120732136571, "grad_norm": 2.105231761932373, "learning_rate": 3.095594353005382e-06, "loss": 0.599, "step": 9911 }, { "epoch": 0.6977824709609293, "grad_norm": 1.7613534927368164, "learning_rate": 3.0942696960139235e-06, "loss": 0.8087, "step": 9912 }, { "epoch": 0.6978528687082013, "grad_norm": 1.977926254272461, "learning_rate": 3.092945242084924e-06, "loss": 0.6145, "step": 9913 }, { "epoch": 0.6979232664554734, "grad_norm": 1.7748931646347046, "learning_rate": 3.091620991287237e-06, "loss": 0.6962, "step": 9914 }, { "epoch": 0.6979936642027456, "grad_norm": 1.6308856010437012, "learning_rate": 3.0902969436897177e-06, "loss": 0.637, "step": 9915 }, { "epoch": 0.6980640619500176, "grad_norm": 1.8035162687301636, "learning_rate": 3.0889730993612e-06, "loss": 0.6599, "step": 9916 }, { "epoch": 0.6981344596972897, "grad_norm": 1.5533947944641113, "learning_rate": 3.0876494583705137e-06, "loss": 0.6893, "step": 9917 }, { "epoch": 0.6982048574445617, "grad_norm": 1.694244384765625, "learning_rate": 3.0863260207864726e-06, "loss": 0.6558, "step": 9918 }, { "epoch": 0.6982752551918339, "grad_norm": 2.0870532989501953, "learning_rate": 3.0850027866778888e-06, "loss": 0.6597, "step": 9919 }, { "epoch": 0.698345652939106, "grad_norm": 2.5110483169555664, "learning_rate": 3.083679756113553e-06, "loss": 0.6625, "step": 9920 }, { "epoch": 0.698416050686378, "grad_norm": 1.9592125415802002, "learning_rate": 3.0823569291622558e-06, "loss": 0.5697, "step": 9921 }, { "epoch": 0.6984864484336502, "grad_norm": 1.6685484647750854, "learning_rate": 3.08103430589277e-06, "loss": 0.6306, "step": 9922 }, { "epoch": 0.6985568461809222, "grad_norm": 1.6407891511917114, "learning_rate": 3.0797118863738605e-06, "loss": 0.6233, "step": 9923 }, { "epoch": 0.6986272439281943, "grad_norm": 1.6002750396728516, "learning_rate": 3.078389670674277e-06, "loss": 0.6019, "step": 9924 }, { "epoch": 0.6986976416754663, "grad_norm": 1.6346689462661743, "learning_rate": 3.077067658862769e-06, "loss": 0.6031, "step": 9925 }, { "epoch": 0.6987680394227385, "grad_norm": 1.77586829662323, "learning_rate": 3.0757458510080634e-06, "loss": 0.6305, "step": 9926 }, { "epoch": 0.6988384371700106, "grad_norm": 1.5849015712738037, "learning_rate": 3.074424247178888e-06, "loss": 0.6636, "step": 9927 }, { "epoch": 0.6989088349172826, "grad_norm": 1.782927393913269, "learning_rate": 3.073102847443951e-06, "loss": 0.5862, "step": 9928 }, { "epoch": 0.6989792326645548, "grad_norm": 1.6202399730682373, "learning_rate": 3.071781651871954e-06, "loss": 0.5503, "step": 9929 }, { "epoch": 0.6990496304118268, "grad_norm": 1.6907997131347656, "learning_rate": 3.0704606605315827e-06, "loss": 0.7258, "step": 9930 }, { "epoch": 0.6991200281590989, "grad_norm": 1.6329880952835083, "learning_rate": 3.069139873491522e-06, "loss": 0.6715, "step": 9931 }, { "epoch": 0.699190425906371, "grad_norm": 2.2886157035827637, "learning_rate": 3.0678192908204403e-06, "loss": 0.6835, "step": 9932 }, { "epoch": 0.6992608236536431, "grad_norm": 1.7134819030761719, "learning_rate": 3.0664989125869956e-06, "loss": 0.6838, "step": 9933 }, { "epoch": 0.6993312214009152, "grad_norm": 2.6969239711761475, "learning_rate": 3.0651787388598346e-06, "loss": 0.5845, "step": 9934 }, { "epoch": 0.6994016191481872, "grad_norm": 1.7654513120651245, "learning_rate": 3.063858769707593e-06, "loss": 0.6514, "step": 9935 }, { "epoch": 0.6994720168954593, "grad_norm": 1.5988155603408813, "learning_rate": 3.0625390051989005e-06, "loss": 0.7556, "step": 9936 }, { "epoch": 0.6995424146427315, "grad_norm": 1.7948905229568481, "learning_rate": 3.0612194454023683e-06, "loss": 0.7142, "step": 9937 }, { "epoch": 0.6996128123900035, "grad_norm": 1.8342785835266113, "learning_rate": 3.059900090386607e-06, "loss": 0.5125, "step": 9938 }, { "epoch": 0.6996832101372756, "grad_norm": 1.5799202919006348, "learning_rate": 3.0585809402202084e-06, "loss": 0.6409, "step": 9939 }, { "epoch": 0.6997536078845477, "grad_norm": 2.061084747314453, "learning_rate": 3.057261994971756e-06, "loss": 0.6223, "step": 9940 }, { "epoch": 0.6998240056318198, "grad_norm": 1.915801763534546, "learning_rate": 3.055943254709821e-06, "loss": 0.7688, "step": 9941 }, { "epoch": 0.6998944033790919, "grad_norm": 1.6499098539352417, "learning_rate": 3.054624719502971e-06, "loss": 0.6124, "step": 9942 }, { "epoch": 0.699964801126364, "grad_norm": 2.113302707672119, "learning_rate": 3.053306389419752e-06, "loss": 0.7435, "step": 9943 }, { "epoch": 0.7000351988736361, "grad_norm": 1.918148159980774, "learning_rate": 3.051988264528711e-06, "loss": 0.6815, "step": 9944 }, { "epoch": 0.7001055966209081, "grad_norm": 1.687516689300537, "learning_rate": 3.0506703448983753e-06, "loss": 0.6553, "step": 9945 }, { "epoch": 0.7001759943681802, "grad_norm": 1.7586661577224731, "learning_rate": 3.0493526305972653e-06, "loss": 0.6385, "step": 9946 }, { "epoch": 0.7002463921154523, "grad_norm": 1.7426607608795166, "learning_rate": 3.048035121693888e-06, "loss": 0.7736, "step": 9947 }, { "epoch": 0.7003167898627244, "grad_norm": 1.9727818965911865, "learning_rate": 3.0467178182567455e-06, "loss": 0.563, "step": 9948 }, { "epoch": 0.7003871876099965, "grad_norm": 1.7521144151687622, "learning_rate": 3.0454007203543217e-06, "loss": 0.7281, "step": 9949 }, { "epoch": 0.7004575853572685, "grad_norm": 1.9237799644470215, "learning_rate": 3.044083828055098e-06, "loss": 0.7325, "step": 9950 }, { "epoch": 0.7005279831045407, "grad_norm": 2.0014851093292236, "learning_rate": 3.042767141427539e-06, "loss": 0.7079, "step": 9951 }, { "epoch": 0.7005983808518127, "grad_norm": 1.8069252967834473, "learning_rate": 3.0414506605401014e-06, "loss": 0.6676, "step": 9952 }, { "epoch": 0.7006687785990848, "grad_norm": 1.7079813480377197, "learning_rate": 3.040134385461225e-06, "loss": 0.7015, "step": 9953 }, { "epoch": 0.700739176346357, "grad_norm": 1.5666134357452393, "learning_rate": 3.0388183162593513e-06, "loss": 0.5908, "step": 9954 }, { "epoch": 0.700809574093629, "grad_norm": 1.878873348236084, "learning_rate": 3.0375024530028986e-06, "loss": 0.7736, "step": 9955 }, { "epoch": 0.7008799718409011, "grad_norm": 2.369109869003296, "learning_rate": 3.0361867957602835e-06, "loss": 0.6442, "step": 9956 }, { "epoch": 0.7009503695881731, "grad_norm": 1.7840155363082886, "learning_rate": 3.0348713445999075e-06, "loss": 0.7165, "step": 9957 }, { "epoch": 0.7010207673354453, "grad_norm": 1.6194804906845093, "learning_rate": 3.0335560995901615e-06, "loss": 0.769, "step": 9958 }, { "epoch": 0.7010911650827174, "grad_norm": 2.1263160705566406, "learning_rate": 3.032241060799423e-06, "loss": 0.7061, "step": 9959 }, { "epoch": 0.7011615628299894, "grad_norm": 1.7201207876205444, "learning_rate": 3.030926228296068e-06, "loss": 0.7545, "step": 9960 }, { "epoch": 0.7012319605772616, "grad_norm": 1.9961334466934204, "learning_rate": 3.0296116021484503e-06, "loss": 0.7484, "step": 9961 }, { "epoch": 0.7013023583245336, "grad_norm": 1.7290037870407104, "learning_rate": 3.028297182424924e-06, "loss": 0.6662, "step": 9962 }, { "epoch": 0.7013727560718057, "grad_norm": 1.839990258216858, "learning_rate": 3.026982969193824e-06, "loss": 0.7829, "step": 9963 }, { "epoch": 0.7014431538190777, "grad_norm": 1.6594538688659668, "learning_rate": 3.0256689625234773e-06, "loss": 0.5201, "step": 9964 }, { "epoch": 0.7015135515663499, "grad_norm": 1.9447969198226929, "learning_rate": 3.0243551624821985e-06, "loss": 0.555, "step": 9965 }, { "epoch": 0.701583949313622, "grad_norm": 1.9169667959213257, "learning_rate": 3.023041569138298e-06, "loss": 0.7933, "step": 9966 }, { "epoch": 0.701654347060894, "grad_norm": 1.8591681718826294, "learning_rate": 3.021728182560065e-06, "loss": 0.7258, "step": 9967 }, { "epoch": 0.7017247448081662, "grad_norm": 1.6500874757766724, "learning_rate": 3.02041500281579e-06, "loss": 0.7696, "step": 9968 }, { "epoch": 0.7017951425554382, "grad_norm": 1.8397413492202759, "learning_rate": 3.0191020299737424e-06, "loss": 0.6896, "step": 9969 }, { "epoch": 0.7018655403027103, "grad_norm": 1.7320343255996704, "learning_rate": 3.017789264102186e-06, "loss": 0.5827, "step": 9970 }, { "epoch": 0.7019359380499824, "grad_norm": 1.9542497396469116, "learning_rate": 3.0164767052693695e-06, "loss": 0.8304, "step": 9971 }, { "epoch": 0.7020063357972545, "grad_norm": 1.961185336112976, "learning_rate": 3.01516435354354e-06, "loss": 0.6433, "step": 9972 }, { "epoch": 0.7020767335445266, "grad_norm": 1.7294867038726807, "learning_rate": 3.013852208992921e-06, "loss": 0.7757, "step": 9973 }, { "epoch": 0.7021471312917986, "grad_norm": 1.6946605443954468, "learning_rate": 3.012540271685739e-06, "loss": 0.6939, "step": 9974 }, { "epoch": 0.7022175290390708, "grad_norm": 1.840102195739746, "learning_rate": 3.011228541690199e-06, "loss": 0.7231, "step": 9975 }, { "epoch": 0.7022879267863429, "grad_norm": 1.9415065050125122, "learning_rate": 3.0099170190745e-06, "loss": 0.6195, "step": 9976 }, { "epoch": 0.7023583245336149, "grad_norm": 1.5610259771347046, "learning_rate": 3.0086057039068266e-06, "loss": 0.5266, "step": 9977 }, { "epoch": 0.702428722280887, "grad_norm": 1.7736183404922485, "learning_rate": 3.007294596255359e-06, "loss": 0.5888, "step": 9978 }, { "epoch": 0.7024991200281591, "grad_norm": 1.7453997135162354, "learning_rate": 3.0059836961882597e-06, "loss": 0.6833, "step": 9979 }, { "epoch": 0.7025695177754312, "grad_norm": 1.823598027229309, "learning_rate": 3.0046730037736877e-06, "loss": 0.627, "step": 9980 }, { "epoch": 0.7026399155227033, "grad_norm": 1.7351704835891724, "learning_rate": 3.0033625190797816e-06, "loss": 0.6981, "step": 9981 }, { "epoch": 0.7027103132699754, "grad_norm": 2.019728899002075, "learning_rate": 3.0020522421746804e-06, "loss": 0.603, "step": 9982 }, { "epoch": 0.7027807110172475, "grad_norm": 1.6089885234832764, "learning_rate": 3.000742173126504e-06, "loss": 0.6049, "step": 9983 }, { "epoch": 0.7028511087645195, "grad_norm": 1.894290566444397, "learning_rate": 2.999432312003361e-06, "loss": 0.6769, "step": 9984 }, { "epoch": 0.7029215065117916, "grad_norm": 1.6145617961883545, "learning_rate": 2.9981226588733587e-06, "loss": 0.6359, "step": 9985 }, { "epoch": 0.7029919042590637, "grad_norm": 1.755478024482727, "learning_rate": 2.9968132138045803e-06, "loss": 0.5813, "step": 9986 }, { "epoch": 0.7030623020063358, "grad_norm": 1.7813531160354614, "learning_rate": 2.995503976865111e-06, "loss": 0.7243, "step": 9987 }, { "epoch": 0.7031326997536079, "grad_norm": 1.5463329553604126, "learning_rate": 2.9941949481230175e-06, "loss": 0.5378, "step": 9988 }, { "epoch": 0.70320309750088, "grad_norm": 1.689437985420227, "learning_rate": 2.9928861276463555e-06, "loss": 0.6743, "step": 9989 }, { "epoch": 0.7032734952481521, "grad_norm": 1.5676058530807495, "learning_rate": 2.9915775155031708e-06, "loss": 0.6412, "step": 9990 }, { "epoch": 0.7033438929954241, "grad_norm": 2.1062145233154297, "learning_rate": 2.9902691117615036e-06, "loss": 0.6198, "step": 9991 }, { "epoch": 0.7034142907426962, "grad_norm": 2.0691866874694824, "learning_rate": 2.9889609164893743e-06, "loss": 0.7145, "step": 9992 }, { "epoch": 0.7034846884899684, "grad_norm": 1.8989055156707764, "learning_rate": 2.987652929754802e-06, "loss": 0.5851, "step": 9993 }, { "epoch": 0.7035550862372404, "grad_norm": 1.9078233242034912, "learning_rate": 2.986345151625788e-06, "loss": 0.6593, "step": 9994 }, { "epoch": 0.7036254839845125, "grad_norm": 1.7003302574157715, "learning_rate": 2.9850375821703243e-06, "loss": 0.7031, "step": 9995 }, { "epoch": 0.7036958817317845, "grad_norm": 2.9607975482940674, "learning_rate": 2.98373022145639e-06, "loss": 0.6085, "step": 9996 }, { "epoch": 0.7037662794790567, "grad_norm": 1.9153721332550049, "learning_rate": 2.982423069551962e-06, "loss": 0.6626, "step": 9997 }, { "epoch": 0.7038366772263288, "grad_norm": 1.7958123683929443, "learning_rate": 2.9811161265249944e-06, "loss": 0.6967, "step": 9998 }, { "epoch": 0.7039070749736008, "grad_norm": 1.985589623451233, "learning_rate": 2.9798093924434412e-06, "loss": 0.6212, "step": 9999 }, { "epoch": 0.703977472720873, "grad_norm": 1.648389220237732, "learning_rate": 2.978502867375239e-06, "loss": 0.6354, "step": 10000 }, { "epoch": 0.704047870468145, "grad_norm": 2.2563416957855225, "learning_rate": 2.977196551388314e-06, "loss": 0.7408, "step": 10001 }, { "epoch": 0.7041182682154171, "grad_norm": 2.125845193862915, "learning_rate": 2.975890444550582e-06, "loss": 0.5976, "step": 10002 }, { "epoch": 0.7041886659626891, "grad_norm": 1.8960342407226562, "learning_rate": 2.974584546929953e-06, "loss": 0.6833, "step": 10003 }, { "epoch": 0.7042590637099613, "grad_norm": 1.8986183404922485, "learning_rate": 2.9732788585943157e-06, "loss": 0.6496, "step": 10004 }, { "epoch": 0.7043294614572334, "grad_norm": 1.5489532947540283, "learning_rate": 2.9719733796115605e-06, "loss": 0.725, "step": 10005 }, { "epoch": 0.7043998592045054, "grad_norm": 1.7399208545684814, "learning_rate": 2.970668110049557e-06, "loss": 0.6495, "step": 10006 }, { "epoch": 0.7044702569517776, "grad_norm": 1.7441058158874512, "learning_rate": 2.9693630499761674e-06, "loss": 0.6594, "step": 10007 }, { "epoch": 0.7045406546990496, "grad_norm": 1.9479979276657104, "learning_rate": 2.9680581994592406e-06, "loss": 0.6068, "step": 10008 }, { "epoch": 0.7046110524463217, "grad_norm": 1.6649194955825806, "learning_rate": 2.9667535585666223e-06, "loss": 0.7722, "step": 10009 }, { "epoch": 0.7046814501935939, "grad_norm": 2.0316741466522217, "learning_rate": 2.965449127366137e-06, "loss": 0.7279, "step": 10010 }, { "epoch": 0.7047518479408659, "grad_norm": 1.4823980331420898, "learning_rate": 2.9641449059256074e-06, "loss": 0.6653, "step": 10011 }, { "epoch": 0.704822245688138, "grad_norm": 1.6513859033584595, "learning_rate": 2.9628408943128395e-06, "loss": 0.5652, "step": 10012 }, { "epoch": 0.70489264343541, "grad_norm": 1.825790286064148, "learning_rate": 2.96153709259563e-06, "loss": 0.5563, "step": 10013 }, { "epoch": 0.7049630411826822, "grad_norm": 2.1634082794189453, "learning_rate": 2.9602335008417623e-06, "loss": 0.6662, "step": 10014 }, { "epoch": 0.7050334389299543, "grad_norm": 2.0479843616485596, "learning_rate": 2.958930119119015e-06, "loss": 0.6805, "step": 10015 }, { "epoch": 0.7051038366772263, "grad_norm": 1.7242577075958252, "learning_rate": 2.957626947495149e-06, "loss": 0.6657, "step": 10016 }, { "epoch": 0.7051742344244984, "grad_norm": 1.8510100841522217, "learning_rate": 2.956323986037921e-06, "loss": 0.6794, "step": 10017 }, { "epoch": 0.7052446321717705, "grad_norm": 1.995133399963379, "learning_rate": 2.9550212348150712e-06, "loss": 0.6038, "step": 10018 }, { "epoch": 0.7053150299190426, "grad_norm": 1.90489661693573, "learning_rate": 2.953718693894331e-06, "loss": 0.6511, "step": 10019 }, { "epoch": 0.7053854276663146, "grad_norm": 1.4566690921783447, "learning_rate": 2.952416363343418e-06, "loss": 0.6184, "step": 10020 }, { "epoch": 0.7054558254135868, "grad_norm": 1.8268593549728394, "learning_rate": 2.951114243230046e-06, "loss": 0.5912, "step": 10021 }, { "epoch": 0.7055262231608589, "grad_norm": 1.544240117073059, "learning_rate": 2.9498123336219097e-06, "loss": 0.6227, "step": 10022 }, { "epoch": 0.7055966209081309, "grad_norm": 1.6924477815628052, "learning_rate": 2.9485106345867e-06, "loss": 0.6735, "step": 10023 }, { "epoch": 0.705667018655403, "grad_norm": 1.7307243347167969, "learning_rate": 2.9472091461920913e-06, "loss": 0.5756, "step": 10024 }, { "epoch": 0.7057374164026751, "grad_norm": 1.7634615898132324, "learning_rate": 2.945907868505751e-06, "loss": 0.7331, "step": 10025 }, { "epoch": 0.7058078141499472, "grad_norm": 1.8963921070098877, "learning_rate": 2.9446068015953284e-06, "loss": 0.5627, "step": 10026 }, { "epoch": 0.7058782118972193, "grad_norm": 1.8217390775680542, "learning_rate": 2.9433059455284737e-06, "loss": 0.7204, "step": 10027 }, { "epoch": 0.7059486096444914, "grad_norm": 1.8642678260803223, "learning_rate": 2.9420053003728145e-06, "loss": 0.726, "step": 10028 }, { "epoch": 0.7060190073917635, "grad_norm": 2.07973051071167, "learning_rate": 2.9407048661959773e-06, "loss": 0.6786, "step": 10029 }, { "epoch": 0.7060894051390355, "grad_norm": 2.2167539596557617, "learning_rate": 2.9394046430655703e-06, "loss": 0.6252, "step": 10030 }, { "epoch": 0.7061598028863076, "grad_norm": 1.703397512435913, "learning_rate": 2.938104631049194e-06, "loss": 0.5628, "step": 10031 }, { "epoch": 0.7062302006335798, "grad_norm": 2.9558167457580566, "learning_rate": 2.9368048302144335e-06, "loss": 0.7147, "step": 10032 }, { "epoch": 0.7063005983808518, "grad_norm": 1.7365111112594604, "learning_rate": 2.9355052406288695e-06, "loss": 0.6775, "step": 10033 }, { "epoch": 0.7063709961281239, "grad_norm": 1.7735717296600342, "learning_rate": 2.934205862360072e-06, "loss": 0.7053, "step": 10034 }, { "epoch": 0.706441393875396, "grad_norm": 1.928139090538025, "learning_rate": 2.932906695475594e-06, "loss": 0.7563, "step": 10035 }, { "epoch": 0.7065117916226681, "grad_norm": 1.7576345205307007, "learning_rate": 2.9316077400429807e-06, "loss": 0.7335, "step": 10036 }, { "epoch": 0.7065821893699402, "grad_norm": 1.7541346549987793, "learning_rate": 2.9303089961297626e-06, "loss": 0.6386, "step": 10037 }, { "epoch": 0.7066525871172122, "grad_norm": 1.8031818866729736, "learning_rate": 2.9290104638034677e-06, "loss": 0.6517, "step": 10038 }, { "epoch": 0.7067229848644844, "grad_norm": 2.138974189758301, "learning_rate": 2.927712143131603e-06, "loss": 0.6623, "step": 10039 }, { "epoch": 0.7067933826117564, "grad_norm": 1.6375983953475952, "learning_rate": 2.9264140341816754e-06, "loss": 0.5633, "step": 10040 }, { "epoch": 0.7068637803590285, "grad_norm": 2.053149461746216, "learning_rate": 2.9251161370211705e-06, "loss": 0.7817, "step": 10041 }, { "epoch": 0.7069341781063005, "grad_norm": 1.8916683197021484, "learning_rate": 2.923818451717569e-06, "loss": 0.6106, "step": 10042 }, { "epoch": 0.7070045758535727, "grad_norm": 1.6641654968261719, "learning_rate": 2.922520978338335e-06, "loss": 0.7052, "step": 10043 }, { "epoch": 0.7070749736008448, "grad_norm": 2.1250782012939453, "learning_rate": 2.9212237169509306e-06, "loss": 0.6432, "step": 10044 }, { "epoch": 0.7071453713481168, "grad_norm": 1.7730687856674194, "learning_rate": 2.9199266676227963e-06, "loss": 0.6636, "step": 10045 }, { "epoch": 0.707215769095389, "grad_norm": 1.9411559104919434, "learning_rate": 2.918629830421372e-06, "loss": 0.6501, "step": 10046 }, { "epoch": 0.707286166842661, "grad_norm": 1.634627103805542, "learning_rate": 2.91733320541408e-06, "loss": 0.6396, "step": 10047 }, { "epoch": 0.7073565645899331, "grad_norm": 2.273071050643921, "learning_rate": 2.916036792668332e-06, "loss": 0.6539, "step": 10048 }, { "epoch": 0.7074269623372053, "grad_norm": 1.532799482345581, "learning_rate": 2.9147405922515265e-06, "loss": 0.6132, "step": 10049 }, { "epoch": 0.7074973600844773, "grad_norm": 1.8561948537826538, "learning_rate": 2.91344460423106e-06, "loss": 0.5857, "step": 10050 }, { "epoch": 0.7075677578317494, "grad_norm": 1.6562203168869019, "learning_rate": 2.912148828674308e-06, "loss": 0.5901, "step": 10051 }, { "epoch": 0.7076381555790214, "grad_norm": 1.8164520263671875, "learning_rate": 2.9108532656486427e-06, "loss": 0.6051, "step": 10052 }, { "epoch": 0.7077085533262936, "grad_norm": 2.0443215370178223, "learning_rate": 2.9095579152214186e-06, "loss": 0.6902, "step": 10053 }, { "epoch": 0.7077789510735657, "grad_norm": 1.7646472454071045, "learning_rate": 2.9082627774599835e-06, "loss": 0.5907, "step": 10054 }, { "epoch": 0.7078493488208377, "grad_norm": 1.7659674882888794, "learning_rate": 2.90696785243167e-06, "loss": 0.7406, "step": 10055 }, { "epoch": 0.7079197465681099, "grad_norm": 1.6204605102539062, "learning_rate": 2.905673140203808e-06, "loss": 0.6348, "step": 10056 }, { "epoch": 0.7079901443153819, "grad_norm": 1.8817546367645264, "learning_rate": 2.9043786408437042e-06, "loss": 0.6891, "step": 10057 }, { "epoch": 0.708060542062654, "grad_norm": 1.962913990020752, "learning_rate": 2.903084354418666e-06, "loss": 0.7941, "step": 10058 }, { "epoch": 0.708130939809926, "grad_norm": 1.8554470539093018, "learning_rate": 2.901790280995983e-06, "loss": 0.706, "step": 10059 }, { "epoch": 0.7082013375571982, "grad_norm": 1.8062853813171387, "learning_rate": 2.9004964206429354e-06, "loss": 0.6888, "step": 10060 }, { "epoch": 0.7082717353044703, "grad_norm": 1.639888048171997, "learning_rate": 2.8992027734267885e-06, "loss": 0.5485, "step": 10061 }, { "epoch": 0.7083421330517423, "grad_norm": 1.9975818395614624, "learning_rate": 2.897909339414806e-06, "loss": 0.6366, "step": 10062 }, { "epoch": 0.7084125307990145, "grad_norm": 2.1892645359039307, "learning_rate": 2.8966161186742295e-06, "loss": 0.6088, "step": 10063 }, { "epoch": 0.7084829285462865, "grad_norm": 2.4466724395751953, "learning_rate": 2.8953231112723e-06, "loss": 0.7141, "step": 10064 }, { "epoch": 0.7085533262935586, "grad_norm": 2.4621660709381104, "learning_rate": 2.8940303172762394e-06, "loss": 0.6163, "step": 10065 }, { "epoch": 0.7086237240408307, "grad_norm": 1.685118317604065, "learning_rate": 2.8927377367532604e-06, "loss": 0.6521, "step": 10066 }, { "epoch": 0.7086941217881028, "grad_norm": 3.233722686767578, "learning_rate": 2.891445369770564e-06, "loss": 0.6756, "step": 10067 }, { "epoch": 0.7087645195353749, "grad_norm": 3.4760682582855225, "learning_rate": 2.8901532163953467e-06, "loss": 0.8055, "step": 10068 }, { "epoch": 0.7088349172826469, "grad_norm": 2.1165568828582764, "learning_rate": 2.8888612766947836e-06, "loss": 0.6775, "step": 10069 }, { "epoch": 0.708905315029919, "grad_norm": 2.0811569690704346, "learning_rate": 2.887569550736048e-06, "loss": 0.6411, "step": 10070 }, { "epoch": 0.7089757127771912, "grad_norm": 1.802912950515747, "learning_rate": 2.886278038586296e-06, "loss": 0.6361, "step": 10071 }, { "epoch": 0.7090461105244632, "grad_norm": 1.9324332475662231, "learning_rate": 2.884986740312674e-06, "loss": 0.7376, "step": 10072 }, { "epoch": 0.7091165082717353, "grad_norm": 1.5957167148590088, "learning_rate": 2.883695655982316e-06, "loss": 0.7293, "step": 10073 }, { "epoch": 0.7091869060190074, "grad_norm": 1.6647099256515503, "learning_rate": 2.8824047856623506e-06, "loss": 0.6568, "step": 10074 }, { "epoch": 0.7092573037662795, "grad_norm": 1.778939127922058, "learning_rate": 2.8811141294198884e-06, "loss": 0.6771, "step": 10075 }, { "epoch": 0.7093277015135515, "grad_norm": 1.8343428373336792, "learning_rate": 2.8798236873220343e-06, "loss": 0.7314, "step": 10076 }, { "epoch": 0.7093980992608236, "grad_norm": 1.8079493045806885, "learning_rate": 2.8785334594358785e-06, "loss": 0.639, "step": 10077 }, { "epoch": 0.7094684970080958, "grad_norm": 2.220045328140259, "learning_rate": 2.8772434458285e-06, "loss": 0.7172, "step": 10078 }, { "epoch": 0.7095388947553678, "grad_norm": 2.0146853923797607, "learning_rate": 2.8759536465669677e-06, "loss": 0.706, "step": 10079 }, { "epoch": 0.7096092925026399, "grad_norm": 1.6228104829788208, "learning_rate": 2.8746640617183413e-06, "loss": 0.6793, "step": 10080 }, { "epoch": 0.709679690249912, "grad_norm": 1.6667505502700806, "learning_rate": 2.8733746913496646e-06, "loss": 0.5433, "step": 10081 }, { "epoch": 0.7097500879971841, "grad_norm": 1.8715029954910278, "learning_rate": 2.8720855355279744e-06, "loss": 0.5804, "step": 10082 }, { "epoch": 0.7098204857444562, "grad_norm": 1.6813031435012817, "learning_rate": 2.870796594320299e-06, "loss": 0.5781, "step": 10083 }, { "epoch": 0.7098908834917282, "grad_norm": 1.8256782293319702, "learning_rate": 2.8695078677936475e-06, "loss": 0.6511, "step": 10084 }, { "epoch": 0.7099612812390004, "grad_norm": 2.199021577835083, "learning_rate": 2.8682193560150232e-06, "loss": 0.8136, "step": 10085 }, { "epoch": 0.7100316789862724, "grad_norm": 1.7925736904144287, "learning_rate": 2.8669310590514146e-06, "loss": 0.7102, "step": 10086 }, { "epoch": 0.7101020767335445, "grad_norm": 1.9456300735473633, "learning_rate": 2.8656429769698055e-06, "loss": 0.6162, "step": 10087 }, { "epoch": 0.7101724744808167, "grad_norm": 1.9773117303848267, "learning_rate": 2.8643551098371592e-06, "loss": 0.7027, "step": 10088 }, { "epoch": 0.7102428722280887, "grad_norm": 1.629971981048584, "learning_rate": 2.863067457720439e-06, "loss": 0.5062, "step": 10089 }, { "epoch": 0.7103132699753608, "grad_norm": 1.8264412879943848, "learning_rate": 2.861780020686588e-06, "loss": 0.598, "step": 10090 }, { "epoch": 0.7103836677226328, "grad_norm": 2.549961805343628, "learning_rate": 2.8604927988025424e-06, "loss": 0.6864, "step": 10091 }, { "epoch": 0.710454065469905, "grad_norm": 2.1344809532165527, "learning_rate": 2.859205792135222e-06, "loss": 0.6593, "step": 10092 }, { "epoch": 0.7105244632171771, "grad_norm": 1.7912046909332275, "learning_rate": 2.8579190007515453e-06, "loss": 0.8025, "step": 10093 }, { "epoch": 0.7105948609644491, "grad_norm": 1.9107897281646729, "learning_rate": 2.8566324247184086e-06, "loss": 0.6739, "step": 10094 }, { "epoch": 0.7106652587117213, "grad_norm": 1.7673077583312988, "learning_rate": 2.855346064102707e-06, "loss": 0.6631, "step": 10095 }, { "epoch": 0.7107356564589933, "grad_norm": 1.9438629150390625, "learning_rate": 2.8540599189713177e-06, "loss": 0.8047, "step": 10096 }, { "epoch": 0.7108060542062654, "grad_norm": 1.7098110914230347, "learning_rate": 2.8527739893911073e-06, "loss": 0.6286, "step": 10097 }, { "epoch": 0.7108764519535374, "grad_norm": 1.919644832611084, "learning_rate": 2.85148827542893e-06, "loss": 0.7395, "step": 10098 }, { "epoch": 0.7109468497008096, "grad_norm": 1.949416160583496, "learning_rate": 2.8502027771516376e-06, "loss": 0.6271, "step": 10099 }, { "epoch": 0.7110172474480817, "grad_norm": 1.812337875366211, "learning_rate": 2.8489174946260587e-06, "loss": 0.6271, "step": 10100 }, { "epoch": 0.7110876451953537, "grad_norm": 1.6099995374679565, "learning_rate": 2.8476324279190215e-06, "loss": 0.6552, "step": 10101 }, { "epoch": 0.7111580429426259, "grad_norm": 1.9353443384170532, "learning_rate": 2.846347577097335e-06, "loss": 0.6189, "step": 10102 }, { "epoch": 0.7112284406898979, "grad_norm": 2.3352084159851074, "learning_rate": 2.8450629422277986e-06, "loss": 0.7473, "step": 10103 }, { "epoch": 0.71129883843717, "grad_norm": 2.060573101043701, "learning_rate": 2.8437785233772015e-06, "loss": 0.7014, "step": 10104 }, { "epoch": 0.7113692361844421, "grad_norm": 1.7785701751708984, "learning_rate": 2.842494320612325e-06, "loss": 0.6469, "step": 10105 }, { "epoch": 0.7114396339317142, "grad_norm": 2.296353578567505, "learning_rate": 2.8412103339999314e-06, "loss": 0.686, "step": 10106 }, { "epoch": 0.7115100316789863, "grad_norm": 2.0027904510498047, "learning_rate": 2.839926563606782e-06, "loss": 0.6124, "step": 10107 }, { "epoch": 0.7115804294262583, "grad_norm": 1.580527663230896, "learning_rate": 2.8386430094996176e-06, "loss": 0.5401, "step": 10108 }, { "epoch": 0.7116508271735305, "grad_norm": 1.8760838508605957, "learning_rate": 2.8373596717451722e-06, "loss": 0.6664, "step": 10109 }, { "epoch": 0.7117212249208026, "grad_norm": 1.6811845302581787, "learning_rate": 2.836076550410165e-06, "loss": 0.6138, "step": 10110 }, { "epoch": 0.7117916226680746, "grad_norm": 1.8571984767913818, "learning_rate": 2.8347936455613115e-06, "loss": 0.641, "step": 10111 }, { "epoch": 0.7118620204153467, "grad_norm": 1.8106952905654907, "learning_rate": 2.8335109572653054e-06, "loss": 0.6504, "step": 10112 }, { "epoch": 0.7119324181626188, "grad_norm": 1.9275500774383545, "learning_rate": 2.832228485588841e-06, "loss": 0.6633, "step": 10113 }, { "epoch": 0.7120028159098909, "grad_norm": 2.163856029510498, "learning_rate": 2.8309462305985923e-06, "loss": 0.7343, "step": 10114 }, { "epoch": 0.7120732136571629, "grad_norm": 2.0078117847442627, "learning_rate": 2.8296641923612247e-06, "loss": 0.6078, "step": 10115 }, { "epoch": 0.712143611404435, "grad_norm": 1.735643982887268, "learning_rate": 2.8283823709433902e-06, "loss": 0.5463, "step": 10116 }, { "epoch": 0.7122140091517072, "grad_norm": 2.318547010421753, "learning_rate": 2.8271007664117364e-06, "loss": 0.5694, "step": 10117 }, { "epoch": 0.7122844068989792, "grad_norm": 2.0515449047088623, "learning_rate": 2.825819378832891e-06, "loss": 0.6846, "step": 10118 }, { "epoch": 0.7123548046462513, "grad_norm": 1.9481446743011475, "learning_rate": 2.824538208273479e-06, "loss": 0.724, "step": 10119 }, { "epoch": 0.7124252023935234, "grad_norm": 2.0624639987945557, "learning_rate": 2.8232572548001064e-06, "loss": 0.6229, "step": 10120 }, { "epoch": 0.7124956001407955, "grad_norm": 1.8170660734176636, "learning_rate": 2.821976518479372e-06, "loss": 0.5622, "step": 10121 }, { "epoch": 0.7125659978880676, "grad_norm": 1.6875512599945068, "learning_rate": 2.82069599937786e-06, "loss": 0.6366, "step": 10122 }, { "epoch": 0.7126363956353396, "grad_norm": 2.0657260417938232, "learning_rate": 2.819415697562151e-06, "loss": 0.8069, "step": 10123 }, { "epoch": 0.7127067933826118, "grad_norm": 2.7567646503448486, "learning_rate": 2.8181356130988027e-06, "loss": 0.6792, "step": 10124 }, { "epoch": 0.7127771911298838, "grad_norm": 1.7986947298049927, "learning_rate": 2.8168557460543743e-06, "loss": 0.601, "step": 10125 }, { "epoch": 0.7128475888771559, "grad_norm": 1.7955129146575928, "learning_rate": 2.815576096495403e-06, "loss": 0.5991, "step": 10126 }, { "epoch": 0.7129179866244281, "grad_norm": 1.7077510356903076, "learning_rate": 2.814296664488421e-06, "loss": 0.6954, "step": 10127 }, { "epoch": 0.7129883843717001, "grad_norm": 1.6956712007522583, "learning_rate": 2.813017450099944e-06, "loss": 0.6931, "step": 10128 }, { "epoch": 0.7130587821189722, "grad_norm": 2.1122684478759766, "learning_rate": 2.8117384533964836e-06, "loss": 0.6541, "step": 10129 }, { "epoch": 0.7131291798662442, "grad_norm": 1.8957139253616333, "learning_rate": 2.8104596744445323e-06, "loss": 0.5688, "step": 10130 }, { "epoch": 0.7131995776135164, "grad_norm": 1.5819088220596313, "learning_rate": 2.8091811133105783e-06, "loss": 0.6993, "step": 10131 }, { "epoch": 0.7132699753607884, "grad_norm": 1.7509477138519287, "learning_rate": 2.807902770061094e-06, "loss": 0.7124, "step": 10132 }, { "epoch": 0.7133403731080605, "grad_norm": 1.759712815284729, "learning_rate": 2.806624644762539e-06, "loss": 0.5163, "step": 10133 }, { "epoch": 0.7134107708553327, "grad_norm": 1.9700337648391724, "learning_rate": 2.805346737481369e-06, "loss": 0.633, "step": 10134 }, { "epoch": 0.7134811686026047, "grad_norm": 2.1291818618774414, "learning_rate": 2.804069048284019e-06, "loss": 0.5608, "step": 10135 }, { "epoch": 0.7135515663498768, "grad_norm": 2.283766984939575, "learning_rate": 2.8027915772369205e-06, "loss": 0.6756, "step": 10136 }, { "epoch": 0.7136219640971488, "grad_norm": 1.8617198467254639, "learning_rate": 2.8015143244064903e-06, "loss": 0.6289, "step": 10137 }, { "epoch": 0.713692361844421, "grad_norm": 1.898706316947937, "learning_rate": 2.800237289859132e-06, "loss": 0.7164, "step": 10138 }, { "epoch": 0.7137627595916931, "grad_norm": 1.9101887941360474, "learning_rate": 2.7989604736612383e-06, "loss": 0.6544, "step": 10139 }, { "epoch": 0.7138331573389651, "grad_norm": 1.7816282510757446, "learning_rate": 2.7976838758791965e-06, "loss": 0.6388, "step": 10140 }, { "epoch": 0.7139035550862373, "grad_norm": 1.6882436275482178, "learning_rate": 2.796407496579374e-06, "loss": 0.5628, "step": 10141 }, { "epoch": 0.7139739528335093, "grad_norm": 1.6930179595947266, "learning_rate": 2.7951313358281348e-06, "loss": 0.6171, "step": 10142 }, { "epoch": 0.7140443505807814, "grad_norm": 2.290767192840576, "learning_rate": 2.793855393691825e-06, "loss": 0.643, "step": 10143 }, { "epoch": 0.7141147483280536, "grad_norm": 1.7549229860305786, "learning_rate": 2.7925796702367833e-06, "loss": 0.575, "step": 10144 }, { "epoch": 0.7141851460753256, "grad_norm": 1.7177040576934814, "learning_rate": 2.7913041655293325e-06, "loss": 0.6066, "step": 10145 }, { "epoch": 0.7142555438225977, "grad_norm": 1.8345248699188232, "learning_rate": 2.7900288796357925e-06, "loss": 0.6592, "step": 10146 }, { "epoch": 0.7143259415698697, "grad_norm": 1.7991154193878174, "learning_rate": 2.788753812622461e-06, "loss": 0.6248, "step": 10147 }, { "epoch": 0.7143963393171419, "grad_norm": 1.8708922863006592, "learning_rate": 2.7874789645556353e-06, "loss": 0.5848, "step": 10148 }, { "epoch": 0.714466737064414, "grad_norm": 1.7442247867584229, "learning_rate": 2.7862043355015933e-06, "loss": 0.6606, "step": 10149 }, { "epoch": 0.714537134811686, "grad_norm": 1.9585180282592773, "learning_rate": 2.784929925526604e-06, "loss": 0.6494, "step": 10150 }, { "epoch": 0.7146075325589581, "grad_norm": 1.7395250797271729, "learning_rate": 2.783655734696923e-06, "loss": 0.728, "step": 10151 }, { "epoch": 0.7146779303062302, "grad_norm": 1.8813142776489258, "learning_rate": 2.782381763078801e-06, "loss": 0.6491, "step": 10152 }, { "epoch": 0.7147483280535023, "grad_norm": 1.9267650842666626, "learning_rate": 2.7811080107384683e-06, "loss": 0.6272, "step": 10153 }, { "epoch": 0.7148187258007743, "grad_norm": 1.8490040302276611, "learning_rate": 2.779834477742154e-06, "loss": 0.6379, "step": 10154 }, { "epoch": 0.7148891235480465, "grad_norm": 2.068742513656616, "learning_rate": 2.778561164156067e-06, "loss": 0.7681, "step": 10155 }, { "epoch": 0.7149595212953186, "grad_norm": 2.136749267578125, "learning_rate": 2.777288070046408e-06, "loss": 0.715, "step": 10156 }, { "epoch": 0.7150299190425906, "grad_norm": 2.166428327560425, "learning_rate": 2.7760151954793643e-06, "loss": 0.6737, "step": 10157 }, { "epoch": 0.7151003167898627, "grad_norm": 1.722038745880127, "learning_rate": 2.774742540521119e-06, "loss": 0.6162, "step": 10158 }, { "epoch": 0.7151707145371348, "grad_norm": 1.8224986791610718, "learning_rate": 2.773470105237832e-06, "loss": 0.7141, "step": 10159 }, { "epoch": 0.7152411122844069, "grad_norm": 1.4595375061035156, "learning_rate": 2.7721978896956654e-06, "loss": 0.7829, "step": 10160 }, { "epoch": 0.715311510031679, "grad_norm": 1.9099692106246948, "learning_rate": 2.7709258939607592e-06, "loss": 0.6553, "step": 10161 }, { "epoch": 0.715381907778951, "grad_norm": 2.506944179534912, "learning_rate": 2.7696541180992457e-06, "loss": 0.6073, "step": 10162 }, { "epoch": 0.7154523055262232, "grad_norm": 2.762363910675049, "learning_rate": 2.7683825621772434e-06, "loss": 0.5821, "step": 10163 }, { "epoch": 0.7155227032734952, "grad_norm": 1.8135781288146973, "learning_rate": 2.767111226260867e-06, "loss": 0.6855, "step": 10164 }, { "epoch": 0.7155931010207673, "grad_norm": 1.8333359956741333, "learning_rate": 2.765840110416208e-06, "loss": 0.6861, "step": 10165 }, { "epoch": 0.7156634987680395, "grad_norm": 1.8007326126098633, "learning_rate": 2.7645692147093597e-06, "loss": 0.6021, "step": 10166 }, { "epoch": 0.7157338965153115, "grad_norm": 2.036118268966675, "learning_rate": 2.7632985392063933e-06, "loss": 0.7119, "step": 10167 }, { "epoch": 0.7158042942625836, "grad_norm": 1.8891340494155884, "learning_rate": 2.762028083973373e-06, "loss": 0.6976, "step": 10168 }, { "epoch": 0.7158746920098557, "grad_norm": 1.8038214445114136, "learning_rate": 2.7607578490763484e-06, "loss": 0.5993, "step": 10169 }, { "epoch": 0.7159450897571278, "grad_norm": 1.9499624967575073, "learning_rate": 2.7594878345813646e-06, "loss": 0.7899, "step": 10170 }, { "epoch": 0.7160154875043998, "grad_norm": 1.7294812202453613, "learning_rate": 2.758218040554447e-06, "loss": 0.7171, "step": 10171 }, { "epoch": 0.7160858852516719, "grad_norm": 1.9732459783554077, "learning_rate": 2.756948467061617e-06, "loss": 0.6277, "step": 10172 }, { "epoch": 0.7161562829989441, "grad_norm": 1.9487568140029907, "learning_rate": 2.755679114168879e-06, "loss": 0.6338, "step": 10173 }, { "epoch": 0.7162266807462161, "grad_norm": 1.967463731765747, "learning_rate": 2.754409981942228e-06, "loss": 0.6492, "step": 10174 }, { "epoch": 0.7162970784934882, "grad_norm": 1.7988029718399048, "learning_rate": 2.753141070447644e-06, "loss": 0.6251, "step": 10175 }, { "epoch": 0.7163674762407602, "grad_norm": 2.8949670791625977, "learning_rate": 2.7518723797511045e-06, "loss": 0.7378, "step": 10176 }, { "epoch": 0.7164378739880324, "grad_norm": 1.7894285917282104, "learning_rate": 2.750603909918565e-06, "loss": 0.6524, "step": 10177 }, { "epoch": 0.7165082717353045, "grad_norm": 1.886279821395874, "learning_rate": 2.7493356610159794e-06, "loss": 0.5449, "step": 10178 }, { "epoch": 0.7165786694825765, "grad_norm": 1.9029539823532104, "learning_rate": 2.7480676331092823e-06, "loss": 0.6781, "step": 10179 }, { "epoch": 0.7166490672298487, "grad_norm": 1.6867389678955078, "learning_rate": 2.7467998262643998e-06, "loss": 0.5941, "step": 10180 }, { "epoch": 0.7167194649771207, "grad_norm": 1.7880326509475708, "learning_rate": 2.7455322405472433e-06, "loss": 0.6251, "step": 10181 }, { "epoch": 0.7167898627243928, "grad_norm": 1.9059619903564453, "learning_rate": 2.7442648760237218e-06, "loss": 0.6506, "step": 10182 }, { "epoch": 0.716860260471665, "grad_norm": 2.130054473876953, "learning_rate": 2.7429977327597204e-06, "loss": 0.6117, "step": 10183 }, { "epoch": 0.716930658218937, "grad_norm": 1.89435875415802, "learning_rate": 2.741730810821123e-06, "loss": 0.6853, "step": 10184 }, { "epoch": 0.7170010559662091, "grad_norm": 1.520882487297058, "learning_rate": 2.7404641102738016e-06, "loss": 0.6285, "step": 10185 }, { "epoch": 0.7170714537134811, "grad_norm": 1.888420581817627, "learning_rate": 2.739197631183604e-06, "loss": 0.6376, "step": 10186 }, { "epoch": 0.7171418514607533, "grad_norm": 2.105788469314575, "learning_rate": 2.737931373616383e-06, "loss": 0.7697, "step": 10187 }, { "epoch": 0.7172122492080254, "grad_norm": 1.9381864070892334, "learning_rate": 2.7366653376379674e-06, "loss": 0.7914, "step": 10188 }, { "epoch": 0.7172826469552974, "grad_norm": 1.735547661781311, "learning_rate": 2.7353995233141846e-06, "loss": 0.707, "step": 10189 }, { "epoch": 0.7173530447025696, "grad_norm": 1.9828996658325195, "learning_rate": 2.7341339307108405e-06, "loss": 0.6185, "step": 10190 }, { "epoch": 0.7174234424498416, "grad_norm": 1.6690255403518677, "learning_rate": 2.732868559893739e-06, "loss": 0.5694, "step": 10191 }, { "epoch": 0.7174938401971137, "grad_norm": 1.88585364818573, "learning_rate": 2.7316034109286654e-06, "loss": 0.6337, "step": 10192 }, { "epoch": 0.7175642379443857, "grad_norm": 6.304566860198975, "learning_rate": 2.7303384838813965e-06, "loss": 0.7107, "step": 10193 }, { "epoch": 0.7176346356916579, "grad_norm": 2.7297840118408203, "learning_rate": 2.7290737788176932e-06, "loss": 0.7114, "step": 10194 }, { "epoch": 0.71770503343893, "grad_norm": 1.7563896179199219, "learning_rate": 2.7278092958033158e-06, "loss": 0.6269, "step": 10195 }, { "epoch": 0.717775431186202, "grad_norm": 1.6814230680465698, "learning_rate": 2.7265450349039995e-06, "loss": 0.6818, "step": 10196 }, { "epoch": 0.7178458289334742, "grad_norm": 1.8521995544433594, "learning_rate": 2.725280996185479e-06, "loss": 0.678, "step": 10197 }, { "epoch": 0.7179162266807462, "grad_norm": 1.793517827987671, "learning_rate": 2.7240171797134714e-06, "loss": 0.7183, "step": 10198 }, { "epoch": 0.7179866244280183, "grad_norm": 2.1142678260803223, "learning_rate": 2.7227535855536827e-06, "loss": 0.6564, "step": 10199 }, { "epoch": 0.7180570221752904, "grad_norm": 1.6673475503921509, "learning_rate": 2.7214902137718068e-06, "loss": 0.5634, "step": 10200 }, { "epoch": 0.7181274199225625, "grad_norm": 1.6095200777053833, "learning_rate": 2.720227064433532e-06, "loss": 0.7166, "step": 10201 }, { "epoch": 0.7181978176698346, "grad_norm": 3.0073227882385254, "learning_rate": 2.7189641376045253e-06, "loss": 0.6747, "step": 10202 }, { "epoch": 0.7182682154171066, "grad_norm": 1.8546141386032104, "learning_rate": 2.717701433350453e-06, "loss": 0.5819, "step": 10203 }, { "epoch": 0.7183386131643787, "grad_norm": 1.7409852743148804, "learning_rate": 2.716438951736962e-06, "loss": 0.6604, "step": 10204 }, { "epoch": 0.7184090109116509, "grad_norm": 1.7670824527740479, "learning_rate": 2.7151766928296887e-06, "loss": 0.6701, "step": 10205 }, { "epoch": 0.7184794086589229, "grad_norm": 2.0035204887390137, "learning_rate": 2.713914656694257e-06, "loss": 0.6817, "step": 10206 }, { "epoch": 0.718549806406195, "grad_norm": 2.0534746646881104, "learning_rate": 2.7126528433962865e-06, "loss": 0.7145, "step": 10207 }, { "epoch": 0.7186202041534671, "grad_norm": 1.7503806352615356, "learning_rate": 2.7113912530013755e-06, "loss": 0.5233, "step": 10208 }, { "epoch": 0.7186906019007392, "grad_norm": 1.7722392082214355, "learning_rate": 2.7101298855751197e-06, "loss": 0.6268, "step": 10209 }, { "epoch": 0.7187609996480112, "grad_norm": 1.7966272830963135, "learning_rate": 2.708868741183096e-06, "loss": 0.6307, "step": 10210 }, { "epoch": 0.7188313973952833, "grad_norm": 1.5686246156692505, "learning_rate": 2.7076078198908726e-06, "loss": 0.6336, "step": 10211 }, { "epoch": 0.7189017951425555, "grad_norm": 1.8944320678710938, "learning_rate": 2.7063471217640034e-06, "loss": 0.6402, "step": 10212 }, { "epoch": 0.7189721928898275, "grad_norm": 2.3201475143432617, "learning_rate": 2.705086646868039e-06, "loss": 0.719, "step": 10213 }, { "epoch": 0.7190425906370996, "grad_norm": 1.87679123878479, "learning_rate": 2.7038263952685073e-06, "loss": 0.6491, "step": 10214 }, { "epoch": 0.7191129883843717, "grad_norm": 3.090827465057373, "learning_rate": 2.702566367030934e-06, "loss": 0.7278, "step": 10215 }, { "epoch": 0.7191833861316438, "grad_norm": 1.7493771314620972, "learning_rate": 2.7013065622208276e-06, "loss": 0.6229, "step": 10216 }, { "epoch": 0.7192537838789159, "grad_norm": 2.1330678462982178, "learning_rate": 2.7000469809036856e-06, "loss": 0.6557, "step": 10217 }, { "epoch": 0.7193241816261879, "grad_norm": 1.9239699840545654, "learning_rate": 2.6987876231449934e-06, "loss": 0.7583, "step": 10218 }, { "epoch": 0.7193945793734601, "grad_norm": 1.5765659809112549, "learning_rate": 2.6975284890102304e-06, "loss": 0.6048, "step": 10219 }, { "epoch": 0.7194649771207321, "grad_norm": 1.7230829000473022, "learning_rate": 2.6962695785648552e-06, "loss": 0.6635, "step": 10220 }, { "epoch": 0.7195353748680042, "grad_norm": 1.79970383644104, "learning_rate": 2.695010891874325e-06, "loss": 0.6228, "step": 10221 }, { "epoch": 0.7196057726152764, "grad_norm": 2.2706217765808105, "learning_rate": 2.6937524290040773e-06, "loss": 0.609, "step": 10222 }, { "epoch": 0.7196761703625484, "grad_norm": 1.6709871292114258, "learning_rate": 2.6924941900195406e-06, "loss": 0.6365, "step": 10223 }, { "epoch": 0.7197465681098205, "grad_norm": 2.157172679901123, "learning_rate": 2.69123617498613e-06, "loss": 0.7235, "step": 10224 }, { "epoch": 0.7198169658570925, "grad_norm": 2.3580710887908936, "learning_rate": 2.689978383969255e-06, "loss": 0.803, "step": 10225 }, { "epoch": 0.7198873636043647, "grad_norm": 2.494523763656616, "learning_rate": 2.6887208170343046e-06, "loss": 0.5329, "step": 10226 }, { "epoch": 0.7199577613516367, "grad_norm": 1.7783260345458984, "learning_rate": 2.687463474246666e-06, "loss": 0.6707, "step": 10227 }, { "epoch": 0.7200281590989088, "grad_norm": 1.5565259456634521, "learning_rate": 2.6862063556717074e-06, "loss": 0.4413, "step": 10228 }, { "epoch": 0.720098556846181, "grad_norm": 1.8819677829742432, "learning_rate": 2.6849494613747867e-06, "loss": 0.62, "step": 10229 }, { "epoch": 0.720168954593453, "grad_norm": 1.762249231338501, "learning_rate": 2.68369279142125e-06, "loss": 0.6135, "step": 10230 }, { "epoch": 0.7202393523407251, "grad_norm": 1.7999740839004517, "learning_rate": 2.682436345876436e-06, "loss": 0.7153, "step": 10231 }, { "epoch": 0.7203097500879971, "grad_norm": 1.810145378112793, "learning_rate": 2.6811801248056636e-06, "loss": 0.688, "step": 10232 }, { "epoch": 0.7203801478352693, "grad_norm": 1.8567789793014526, "learning_rate": 2.6799241282742504e-06, "loss": 0.6273, "step": 10233 }, { "epoch": 0.7204505455825414, "grad_norm": 2.286994695663452, "learning_rate": 2.6786683563474947e-06, "loss": 0.8266, "step": 10234 }, { "epoch": 0.7205209433298134, "grad_norm": 1.5341379642486572, "learning_rate": 2.6774128090906823e-06, "loss": 0.6507, "step": 10235 }, { "epoch": 0.7205913410770856, "grad_norm": 1.7324180603027344, "learning_rate": 2.6761574865690952e-06, "loss": 0.7482, "step": 10236 }, { "epoch": 0.7206617388243576, "grad_norm": 2.7136831283569336, "learning_rate": 2.6749023888479937e-06, "loss": 0.8124, "step": 10237 }, { "epoch": 0.7207321365716297, "grad_norm": 1.7971011400222778, "learning_rate": 2.6736475159926364e-06, "loss": 0.7422, "step": 10238 }, { "epoch": 0.7208025343189018, "grad_norm": 1.7183927297592163, "learning_rate": 2.672392868068263e-06, "loss": 0.5826, "step": 10239 }, { "epoch": 0.7208729320661739, "grad_norm": 1.7980307340621948, "learning_rate": 2.6711384451401036e-06, "loss": 0.686, "step": 10240 }, { "epoch": 0.720943329813446, "grad_norm": 1.9288212060928345, "learning_rate": 2.669884247273375e-06, "loss": 0.589, "step": 10241 }, { "epoch": 0.721013727560718, "grad_norm": 1.7248685359954834, "learning_rate": 2.668630274533288e-06, "loss": 0.5625, "step": 10242 }, { "epoch": 0.7210841253079902, "grad_norm": 1.786866545677185, "learning_rate": 2.6673765269850335e-06, "loss": 0.5995, "step": 10243 }, { "epoch": 0.7211545230552623, "grad_norm": 1.827768325805664, "learning_rate": 2.666123004693799e-06, "loss": 0.7099, "step": 10244 }, { "epoch": 0.7212249208025343, "grad_norm": 2.071207046508789, "learning_rate": 2.6648697077247553e-06, "loss": 0.6702, "step": 10245 }, { "epoch": 0.7212953185498064, "grad_norm": 1.7006618976593018, "learning_rate": 2.663616636143061e-06, "loss": 0.6742, "step": 10246 }, { "epoch": 0.7213657162970785, "grad_norm": 2.5495522022247314, "learning_rate": 2.6623637900138624e-06, "loss": 0.6941, "step": 10247 }, { "epoch": 0.7214361140443506, "grad_norm": 1.9685434103012085, "learning_rate": 2.6611111694023017e-06, "loss": 0.7168, "step": 10248 }, { "epoch": 0.7215065117916226, "grad_norm": 1.7188739776611328, "learning_rate": 2.6598587743734982e-06, "loss": 0.6672, "step": 10249 }, { "epoch": 0.7215769095388948, "grad_norm": 1.7173349857330322, "learning_rate": 2.6586066049925702e-06, "loss": 0.7975, "step": 10250 }, { "epoch": 0.7216473072861669, "grad_norm": 2.1525938510894775, "learning_rate": 2.6573546613246173e-06, "loss": 0.7062, "step": 10251 }, { "epoch": 0.7217177050334389, "grad_norm": 1.7926366329193115, "learning_rate": 2.6561029434347274e-06, "loss": 0.5119, "step": 10252 }, { "epoch": 0.721788102780711, "grad_norm": 3.288958787918091, "learning_rate": 2.654851451387978e-06, "loss": 0.607, "step": 10253 }, { "epoch": 0.7218585005279831, "grad_norm": 1.6256624460220337, "learning_rate": 2.653600185249439e-06, "loss": 0.6499, "step": 10254 }, { "epoch": 0.7219288982752552, "grad_norm": 1.9199604988098145, "learning_rate": 2.65234914508416e-06, "loss": 0.632, "step": 10255 }, { "epoch": 0.7219992960225273, "grad_norm": 2.004490613937378, "learning_rate": 2.6510983309571887e-06, "loss": 0.6607, "step": 10256 }, { "epoch": 0.7220696937697993, "grad_norm": 1.8888812065124512, "learning_rate": 2.6498477429335538e-06, "loss": 0.6721, "step": 10257 }, { "epoch": 0.7221400915170715, "grad_norm": 1.713533639907837, "learning_rate": 2.6485973810782744e-06, "loss": 0.5738, "step": 10258 }, { "epoch": 0.7222104892643435, "grad_norm": 1.935546636581421, "learning_rate": 2.647347245456356e-06, "loss": 0.6309, "step": 10259 }, { "epoch": 0.7222808870116156, "grad_norm": 2.2524161338806152, "learning_rate": 2.646097336132799e-06, "loss": 0.584, "step": 10260 }, { "epoch": 0.7223512847588878, "grad_norm": 2.253981351852417, "learning_rate": 2.644847653172582e-06, "loss": 0.6364, "step": 10261 }, { "epoch": 0.7224216825061598, "grad_norm": 1.843891978263855, "learning_rate": 2.643598196640682e-06, "loss": 0.6053, "step": 10262 }, { "epoch": 0.7224920802534319, "grad_norm": 1.9015884399414062, "learning_rate": 2.6423489666020567e-06, "loss": 0.7176, "step": 10263 }, { "epoch": 0.722562478000704, "grad_norm": 1.8214033842086792, "learning_rate": 2.6410999631216555e-06, "loss": 0.6129, "step": 10264 }, { "epoch": 0.7226328757479761, "grad_norm": 1.685738444328308, "learning_rate": 2.6398511862644124e-06, "loss": 0.6345, "step": 10265 }, { "epoch": 0.7227032734952481, "grad_norm": 2.2754504680633545, "learning_rate": 2.6386026360952576e-06, "loss": 0.6131, "step": 10266 }, { "epoch": 0.7227736712425202, "grad_norm": 1.6086658239364624, "learning_rate": 2.6373543126790996e-06, "loss": 0.6573, "step": 10267 }, { "epoch": 0.7228440689897924, "grad_norm": 1.9143977165222168, "learning_rate": 2.6361062160808435e-06, "loss": 0.5508, "step": 10268 }, { "epoch": 0.7229144667370644, "grad_norm": 1.8227888345718384, "learning_rate": 2.634858346365378e-06, "loss": 0.6279, "step": 10269 }, { "epoch": 0.7229848644843365, "grad_norm": 1.7899259328842163, "learning_rate": 2.63361070359758e-06, "loss": 0.6503, "step": 10270 }, { "epoch": 0.7230552622316085, "grad_norm": 2.3337302207946777, "learning_rate": 2.6323632878423136e-06, "loss": 0.6374, "step": 10271 }, { "epoch": 0.7231256599788807, "grad_norm": 1.6244181394577026, "learning_rate": 2.631116099164438e-06, "loss": 0.7065, "step": 10272 }, { "epoch": 0.7231960577261528, "grad_norm": 1.6365666389465332, "learning_rate": 2.6298691376287904e-06, "loss": 0.7407, "step": 10273 }, { "epoch": 0.7232664554734248, "grad_norm": 1.8988217115402222, "learning_rate": 2.6286224033002073e-06, "loss": 0.6427, "step": 10274 }, { "epoch": 0.723336853220697, "grad_norm": 2.2116246223449707, "learning_rate": 2.627375896243504e-06, "loss": 0.6394, "step": 10275 }, { "epoch": 0.723407250967969, "grad_norm": 1.5864765644073486, "learning_rate": 2.6261296165234875e-06, "loss": 0.5568, "step": 10276 }, { "epoch": 0.7234776487152411, "grad_norm": 1.6424018144607544, "learning_rate": 2.6248835642049516e-06, "loss": 0.6612, "step": 10277 }, { "epoch": 0.7235480464625133, "grad_norm": 1.9140745401382446, "learning_rate": 2.623637739352683e-06, "loss": 0.7272, "step": 10278 }, { "epoch": 0.7236184442097853, "grad_norm": 1.8125613927841187, "learning_rate": 2.6223921420314505e-06, "loss": 0.6461, "step": 10279 }, { "epoch": 0.7236888419570574, "grad_norm": 1.6389987468719482, "learning_rate": 2.6211467723060174e-06, "loss": 0.7059, "step": 10280 }, { "epoch": 0.7237592397043294, "grad_norm": 2.093770980834961, "learning_rate": 2.6199016302411286e-06, "loss": 0.7472, "step": 10281 }, { "epoch": 0.7238296374516016, "grad_norm": 1.736376166343689, "learning_rate": 2.618656715901521e-06, "loss": 0.5754, "step": 10282 }, { "epoch": 0.7239000351988736, "grad_norm": 1.7805185317993164, "learning_rate": 2.6174120293519177e-06, "loss": 0.6193, "step": 10283 }, { "epoch": 0.7239704329461457, "grad_norm": 1.9871174097061157, "learning_rate": 2.6161675706570307e-06, "loss": 0.6938, "step": 10284 }, { "epoch": 0.7240408306934178, "grad_norm": 1.8444913625717163, "learning_rate": 2.614923339881564e-06, "loss": 0.7146, "step": 10285 }, { "epoch": 0.7241112284406899, "grad_norm": 1.6647013425827026, "learning_rate": 2.6136793370902035e-06, "loss": 0.5519, "step": 10286 }, { "epoch": 0.724181626187962, "grad_norm": 1.727646827697754, "learning_rate": 2.6124355623476306e-06, "loss": 0.6795, "step": 10287 }, { "epoch": 0.724252023935234, "grad_norm": 1.8543248176574707, "learning_rate": 2.6111920157185017e-06, "loss": 0.693, "step": 10288 }, { "epoch": 0.7243224216825062, "grad_norm": 1.6542253494262695, "learning_rate": 2.609948697267476e-06, "loss": 0.5669, "step": 10289 }, { "epoch": 0.7243928194297783, "grad_norm": 1.7674121856689453, "learning_rate": 2.6087056070591926e-06, "loss": 0.6704, "step": 10290 }, { "epoch": 0.7244632171770503, "grad_norm": 1.8462090492248535, "learning_rate": 2.6074627451582832e-06, "loss": 0.6614, "step": 10291 }, { "epoch": 0.7245336149243224, "grad_norm": 2.2300493717193604, "learning_rate": 2.606220111629362e-06, "loss": 0.7759, "step": 10292 }, { "epoch": 0.7246040126715945, "grad_norm": 2.212153673171997, "learning_rate": 2.6049777065370408e-06, "loss": 0.6083, "step": 10293 }, { "epoch": 0.7246744104188666, "grad_norm": 1.8609983921051025, "learning_rate": 2.6037355299459043e-06, "loss": 0.6974, "step": 10294 }, { "epoch": 0.7247448081661387, "grad_norm": 2.1889898777008057, "learning_rate": 2.602493581920541e-06, "loss": 0.7044, "step": 10295 }, { "epoch": 0.7248152059134108, "grad_norm": 1.657961130142212, "learning_rate": 2.6012518625255175e-06, "loss": 0.7387, "step": 10296 }, { "epoch": 0.7248856036606829, "grad_norm": 1.877724051475525, "learning_rate": 2.6000103718253948e-06, "loss": 0.7066, "step": 10297 }, { "epoch": 0.7249560014079549, "grad_norm": 1.6603754758834839, "learning_rate": 2.5987691098847162e-06, "loss": 0.5464, "step": 10298 }, { "epoch": 0.725026399155227, "grad_norm": 2.0902798175811768, "learning_rate": 2.59752807676802e-06, "loss": 0.7234, "step": 10299 }, { "epoch": 0.7250967969024992, "grad_norm": 2.2791807651519775, "learning_rate": 2.5962872725398256e-06, "loss": 0.6683, "step": 10300 }, { "epoch": 0.7251671946497712, "grad_norm": 1.746997356414795, "learning_rate": 2.5950466972646445e-06, "loss": 0.55, "step": 10301 }, { "epoch": 0.7252375923970433, "grad_norm": 2.0980730056762695, "learning_rate": 2.5938063510069723e-06, "loss": 0.6214, "step": 10302 }, { "epoch": 0.7253079901443154, "grad_norm": 1.7883740663528442, "learning_rate": 2.5925662338313008e-06, "loss": 0.6064, "step": 10303 }, { "epoch": 0.7253783878915875, "grad_norm": 2.0667238235473633, "learning_rate": 2.5913263458021002e-06, "loss": 0.6808, "step": 10304 }, { "epoch": 0.7254487856388595, "grad_norm": 1.8157246112823486, "learning_rate": 2.5900866869838383e-06, "loss": 0.6501, "step": 10305 }, { "epoch": 0.7255191833861316, "grad_norm": 1.9227396249771118, "learning_rate": 2.588847257440963e-06, "loss": 0.5667, "step": 10306 }, { "epoch": 0.7255895811334038, "grad_norm": 2.1207094192504883, "learning_rate": 2.587608057237914e-06, "loss": 0.5921, "step": 10307 }, { "epoch": 0.7256599788806758, "grad_norm": 1.7970744371414185, "learning_rate": 2.5863690864391152e-06, "loss": 0.6811, "step": 10308 }, { "epoch": 0.7257303766279479, "grad_norm": 2.1011929512023926, "learning_rate": 2.585130345108988e-06, "loss": 0.6683, "step": 10309 }, { "epoch": 0.72580077437522, "grad_norm": 1.8601335287094116, "learning_rate": 2.5838918333119295e-06, "loss": 0.749, "step": 10310 }, { "epoch": 0.7258711721224921, "grad_norm": 1.9433733224868774, "learning_rate": 2.5826535511123365e-06, "loss": 0.6811, "step": 10311 }, { "epoch": 0.7259415698697642, "grad_norm": 2.216212034225464, "learning_rate": 2.581415498574587e-06, "loss": 0.6486, "step": 10312 }, { "epoch": 0.7260119676170362, "grad_norm": 1.6425012350082397, "learning_rate": 2.580177675763046e-06, "loss": 0.6094, "step": 10313 }, { "epoch": 0.7260823653643084, "grad_norm": 3.317004442214966, "learning_rate": 2.578940082742069e-06, "loss": 0.6949, "step": 10314 }, { "epoch": 0.7261527631115804, "grad_norm": 1.6537209749221802, "learning_rate": 2.577702719576003e-06, "loss": 0.7909, "step": 10315 }, { "epoch": 0.7262231608588525, "grad_norm": 2.0623319149017334, "learning_rate": 2.5764655863291753e-06, "loss": 0.6966, "step": 10316 }, { "epoch": 0.7262935586061247, "grad_norm": 1.8167251348495483, "learning_rate": 2.5752286830659105e-06, "loss": 0.6469, "step": 10317 }, { "epoch": 0.7263639563533967, "grad_norm": 1.9063055515289307, "learning_rate": 2.5739920098505134e-06, "loss": 0.6378, "step": 10318 }, { "epoch": 0.7264343541006688, "grad_norm": 2.0043206214904785, "learning_rate": 2.5727555667472806e-06, "loss": 0.6809, "step": 10319 }, { "epoch": 0.7265047518479408, "grad_norm": 1.867244839668274, "learning_rate": 2.5715193538204934e-06, "loss": 0.803, "step": 10320 }, { "epoch": 0.726575149595213, "grad_norm": 1.8845382928848267, "learning_rate": 2.570283371134427e-06, "loss": 0.6293, "step": 10321 }, { "epoch": 0.726645547342485, "grad_norm": 1.6482939720153809, "learning_rate": 2.569047618753339e-06, "loss": 0.8176, "step": 10322 }, { "epoch": 0.7267159450897571, "grad_norm": 1.7835850715637207, "learning_rate": 2.5678120967414796e-06, "loss": 0.706, "step": 10323 }, { "epoch": 0.7267863428370293, "grad_norm": 1.917510747909546, "learning_rate": 2.5665768051630832e-06, "loss": 0.6406, "step": 10324 }, { "epoch": 0.7268567405843013, "grad_norm": 1.6719331741333008, "learning_rate": 2.5653417440823736e-06, "loss": 0.642, "step": 10325 }, { "epoch": 0.7269271383315734, "grad_norm": 1.8751591444015503, "learning_rate": 2.5641069135635612e-06, "loss": 0.6648, "step": 10326 }, { "epoch": 0.7269975360788454, "grad_norm": 1.7475430965423584, "learning_rate": 2.56287231367085e-06, "loss": 0.6681, "step": 10327 }, { "epoch": 0.7270679338261176, "grad_norm": 1.7640563249588013, "learning_rate": 2.561637944468423e-06, "loss": 0.6682, "step": 10328 }, { "epoch": 0.7271383315733897, "grad_norm": 2.068821668624878, "learning_rate": 2.560403806020462e-06, "loss": 0.6275, "step": 10329 }, { "epoch": 0.7272087293206617, "grad_norm": 1.6196540594100952, "learning_rate": 2.559169898391127e-06, "loss": 0.5859, "step": 10330 }, { "epoch": 0.7272791270679339, "grad_norm": 1.673447847366333, "learning_rate": 2.5579362216445723e-06, "loss": 0.6413, "step": 10331 }, { "epoch": 0.7273495248152059, "grad_norm": 1.9744035005569458, "learning_rate": 2.5567027758449323e-06, "loss": 0.6467, "step": 10332 }, { "epoch": 0.727419922562478, "grad_norm": 2.404219627380371, "learning_rate": 2.5554695610563425e-06, "loss": 0.7162, "step": 10333 }, { "epoch": 0.7274903203097501, "grad_norm": 1.932910680770874, "learning_rate": 2.5542365773429125e-06, "loss": 0.6499, "step": 10334 }, { "epoch": 0.7275607180570222, "grad_norm": 1.9428976774215698, "learning_rate": 2.5530038247687517e-06, "loss": 0.6158, "step": 10335 }, { "epoch": 0.7276311158042943, "grad_norm": 1.9125943183898926, "learning_rate": 2.5517713033979493e-06, "loss": 0.5824, "step": 10336 }, { "epoch": 0.7277015135515663, "grad_norm": 1.8740086555480957, "learning_rate": 2.5505390132945836e-06, "loss": 0.7222, "step": 10337 }, { "epoch": 0.7277719112988384, "grad_norm": 1.9459059238433838, "learning_rate": 2.5493069545227264e-06, "loss": 0.7816, "step": 10338 }, { "epoch": 0.7278423090461105, "grad_norm": 1.9712246656417847, "learning_rate": 2.5480751271464286e-06, "loss": 0.6105, "step": 10339 }, { "epoch": 0.7279127067933826, "grad_norm": 1.8439620733261108, "learning_rate": 2.54684353122974e-06, "loss": 0.6835, "step": 10340 }, { "epoch": 0.7279831045406547, "grad_norm": 1.9995825290679932, "learning_rate": 2.5456121668366883e-06, "loss": 0.6597, "step": 10341 }, { "epoch": 0.7280535022879268, "grad_norm": 1.5900872945785522, "learning_rate": 2.5443810340312947e-06, "loss": 0.6195, "step": 10342 }, { "epoch": 0.7281239000351989, "grad_norm": 2.124976873397827, "learning_rate": 2.5431501328775643e-06, "loss": 0.7431, "step": 10343 }, { "epoch": 0.7281942977824709, "grad_norm": 1.9709233045578003, "learning_rate": 2.5419194634394963e-06, "loss": 0.6714, "step": 10344 }, { "epoch": 0.728264695529743, "grad_norm": 1.6129564046859741, "learning_rate": 2.540689025781072e-06, "loss": 0.7199, "step": 10345 }, { "epoch": 0.7283350932770152, "grad_norm": 2.001476526260376, "learning_rate": 2.539458819966265e-06, "loss": 0.7043, "step": 10346 }, { "epoch": 0.7284054910242872, "grad_norm": 1.6625242233276367, "learning_rate": 2.538228846059034e-06, "loss": 0.6117, "step": 10347 }, { "epoch": 0.7284758887715593, "grad_norm": 1.96446692943573, "learning_rate": 2.5369991041233256e-06, "loss": 0.7194, "step": 10348 }, { "epoch": 0.7285462865188314, "grad_norm": 1.7614984512329102, "learning_rate": 2.535769594223074e-06, "loss": 0.5842, "step": 10349 }, { "epoch": 0.7286166842661035, "grad_norm": 1.2903255224227905, "learning_rate": 2.5345403164222063e-06, "loss": 0.7633, "step": 10350 }, { "epoch": 0.7286870820133756, "grad_norm": 1.7091795206069946, "learning_rate": 2.53331127078463e-06, "loss": 0.5876, "step": 10351 }, { "epoch": 0.7287574797606476, "grad_norm": 1.5589216947555542, "learning_rate": 2.532082457374248e-06, "loss": 0.7494, "step": 10352 }, { "epoch": 0.7288278775079198, "grad_norm": 1.876129150390625, "learning_rate": 2.5308538762549452e-06, "loss": 0.6515, "step": 10353 }, { "epoch": 0.7288982752551918, "grad_norm": 2.050868034362793, "learning_rate": 2.5296255274905962e-06, "loss": 0.6091, "step": 10354 }, { "epoch": 0.7289686730024639, "grad_norm": 1.6494568586349487, "learning_rate": 2.528397411145063e-06, "loss": 0.6018, "step": 10355 }, { "epoch": 0.7290390707497361, "grad_norm": 2.221632242202759, "learning_rate": 2.5271695272822007e-06, "loss": 0.6874, "step": 10356 }, { "epoch": 0.7291094684970081, "grad_norm": 1.9048995971679688, "learning_rate": 2.5259418759658427e-06, "loss": 0.7693, "step": 10357 }, { "epoch": 0.7291798662442802, "grad_norm": 1.6914536952972412, "learning_rate": 2.524714457259822e-06, "loss": 0.5889, "step": 10358 }, { "epoch": 0.7292502639915522, "grad_norm": 2.0195040702819824, "learning_rate": 2.523487271227948e-06, "loss": 0.7628, "step": 10359 }, { "epoch": 0.7293206617388244, "grad_norm": 1.656240701675415, "learning_rate": 2.522260317934026e-06, "loss": 0.583, "step": 10360 }, { "epoch": 0.7293910594860964, "grad_norm": 1.9858083724975586, "learning_rate": 2.5210335974418428e-06, "loss": 0.6043, "step": 10361 }, { "epoch": 0.7294614572333685, "grad_norm": 1.6349362134933472, "learning_rate": 2.5198071098151822e-06, "loss": 0.6762, "step": 10362 }, { "epoch": 0.7295318549806407, "grad_norm": 2.0963664054870605, "learning_rate": 2.518580855117804e-06, "loss": 0.6568, "step": 10363 }, { "epoch": 0.7296022527279127, "grad_norm": 1.9063777923583984, "learning_rate": 2.5173548334134683e-06, "loss": 0.7014, "step": 10364 }, { "epoch": 0.7296726504751848, "grad_norm": 1.8628312349319458, "learning_rate": 2.516129044765915e-06, "loss": 0.7922, "step": 10365 }, { "epoch": 0.7297430482224568, "grad_norm": 1.8367992639541626, "learning_rate": 2.5149034892388736e-06, "loss": 0.5862, "step": 10366 }, { "epoch": 0.729813445969729, "grad_norm": 1.8020175695419312, "learning_rate": 2.5136781668960596e-06, "loss": 0.5537, "step": 10367 }, { "epoch": 0.7298838437170011, "grad_norm": 1.927307367324829, "learning_rate": 2.5124530778011822e-06, "loss": 0.6933, "step": 10368 }, { "epoch": 0.7299542414642731, "grad_norm": 1.8053714036941528, "learning_rate": 2.5112282220179316e-06, "loss": 0.7009, "step": 10369 }, { "epoch": 0.7300246392115453, "grad_norm": 1.829987645149231, "learning_rate": 2.510003599609993e-06, "loss": 0.6729, "step": 10370 }, { "epoch": 0.7300950369588173, "grad_norm": 1.6121257543563843, "learning_rate": 2.5087792106410343e-06, "loss": 0.6515, "step": 10371 }, { "epoch": 0.7301654347060894, "grad_norm": 1.6842812299728394, "learning_rate": 2.507555055174711e-06, "loss": 0.6181, "step": 10372 }, { "epoch": 0.7302358324533615, "grad_norm": 2.9443044662475586, "learning_rate": 2.5063311332746676e-06, "loss": 0.6233, "step": 10373 }, { "epoch": 0.7303062302006336, "grad_norm": 1.8835875988006592, "learning_rate": 2.50510744500454e-06, "loss": 0.6831, "step": 10374 }, { "epoch": 0.7303766279479057, "grad_norm": 1.9468082189559937, "learning_rate": 2.503883990427945e-06, "loss": 0.6425, "step": 10375 }, { "epoch": 0.7304470256951777, "grad_norm": 1.7566752433776855, "learning_rate": 2.502660769608496e-06, "loss": 0.5654, "step": 10376 }, { "epoch": 0.7305174234424499, "grad_norm": 2.1460485458374023, "learning_rate": 2.5014377826097866e-06, "loss": 0.6749, "step": 10377 }, { "epoch": 0.7305878211897219, "grad_norm": 2.020026683807373, "learning_rate": 2.5002150294954e-06, "loss": 0.7093, "step": 10378 }, { "epoch": 0.730658218936994, "grad_norm": 1.6902161836624146, "learning_rate": 2.4989925103289074e-06, "loss": 0.6391, "step": 10379 }, { "epoch": 0.7307286166842661, "grad_norm": 2.0284833908081055, "learning_rate": 2.497770225173873e-06, "loss": 0.6863, "step": 10380 }, { "epoch": 0.7307990144315382, "grad_norm": 2.023139476776123, "learning_rate": 2.49654817409384e-06, "loss": 0.6019, "step": 10381 }, { "epoch": 0.7308694121788103, "grad_norm": 1.5427888631820679, "learning_rate": 2.4953263571523485e-06, "loss": 0.5578, "step": 10382 }, { "epoch": 0.7309398099260823, "grad_norm": 1.526821494102478, "learning_rate": 2.4941047744129185e-06, "loss": 0.6563, "step": 10383 }, { "epoch": 0.7310102076733545, "grad_norm": 1.89927339553833, "learning_rate": 2.49288342593906e-06, "loss": 0.678, "step": 10384 }, { "epoch": 0.7310806054206266, "grad_norm": 3.1016879081726074, "learning_rate": 2.491662311794277e-06, "loss": 0.6631, "step": 10385 }, { "epoch": 0.7311510031678986, "grad_norm": 1.7901020050048828, "learning_rate": 2.490441432042051e-06, "loss": 0.643, "step": 10386 }, { "epoch": 0.7312214009151707, "grad_norm": 1.6844515800476074, "learning_rate": 2.4892207867458604e-06, "loss": 0.5677, "step": 10387 }, { "epoch": 0.7312917986624428, "grad_norm": 1.8172078132629395, "learning_rate": 2.4880003759691644e-06, "loss": 0.6054, "step": 10388 }, { "epoch": 0.7313621964097149, "grad_norm": 1.8839858770370483, "learning_rate": 2.4867801997754205e-06, "loss": 0.7593, "step": 10389 }, { "epoch": 0.731432594156987, "grad_norm": 1.858086347579956, "learning_rate": 2.4855602582280564e-06, "loss": 0.5796, "step": 10390 }, { "epoch": 0.731502991904259, "grad_norm": 1.7097035646438599, "learning_rate": 2.4843405513905054e-06, "loss": 0.6145, "step": 10391 }, { "epoch": 0.7315733896515312, "grad_norm": 1.6394836902618408, "learning_rate": 2.483121079326176e-06, "loss": 0.6661, "step": 10392 }, { "epoch": 0.7316437873988032, "grad_norm": 2.0659093856811523, "learning_rate": 2.481901842098475e-06, "loss": 0.6551, "step": 10393 }, { "epoch": 0.7317141851460753, "grad_norm": 2.311492681503296, "learning_rate": 2.4806828397707868e-06, "loss": 0.6077, "step": 10394 }, { "epoch": 0.7317845828933475, "grad_norm": 1.9859529733657837, "learning_rate": 2.4794640724064956e-06, "loss": 0.6624, "step": 10395 }, { "epoch": 0.7318549806406195, "grad_norm": 2.0605568885803223, "learning_rate": 2.478245540068956e-06, "loss": 0.7471, "step": 10396 }, { "epoch": 0.7319253783878916, "grad_norm": 1.6775743961334229, "learning_rate": 2.477027242821529e-06, "loss": 0.6633, "step": 10397 }, { "epoch": 0.7319957761351636, "grad_norm": 1.9092340469360352, "learning_rate": 2.475809180727549e-06, "loss": 0.6218, "step": 10398 }, { "epoch": 0.7320661738824358, "grad_norm": 1.6495814323425293, "learning_rate": 2.4745913538503494e-06, "loss": 0.596, "step": 10399 }, { "epoch": 0.7321365716297078, "grad_norm": 2.274129867553711, "learning_rate": 2.4733737622532415e-06, "loss": 0.6144, "step": 10400 }, { "epoch": 0.7322069693769799, "grad_norm": 1.8947898149490356, "learning_rate": 2.472156405999536e-06, "loss": 0.7046, "step": 10401 }, { "epoch": 0.7322773671242521, "grad_norm": 1.6649097204208374, "learning_rate": 2.4709392851525147e-06, "loss": 0.7474, "step": 10402 }, { "epoch": 0.7323477648715241, "grad_norm": 1.6019611358642578, "learning_rate": 2.469722399775464e-06, "loss": 0.6189, "step": 10403 }, { "epoch": 0.7324181626187962, "grad_norm": 1.6378843784332275, "learning_rate": 2.468505749931647e-06, "loss": 0.615, "step": 10404 }, { "epoch": 0.7324885603660682, "grad_norm": 1.772958517074585, "learning_rate": 2.467289335684322e-06, "loss": 0.6923, "step": 10405 }, { "epoch": 0.7325589581133404, "grad_norm": 1.7548274993896484, "learning_rate": 2.4660731570967273e-06, "loss": 0.7506, "step": 10406 }, { "epoch": 0.7326293558606125, "grad_norm": 1.8461021184921265, "learning_rate": 2.4648572142320975e-06, "loss": 0.6914, "step": 10407 }, { "epoch": 0.7326997536078845, "grad_norm": 1.9672750234603882, "learning_rate": 2.463641507153648e-06, "loss": 0.6193, "step": 10408 }, { "epoch": 0.7327701513551567, "grad_norm": 1.6489132642745972, "learning_rate": 2.462426035924585e-06, "loss": 0.6284, "step": 10409 }, { "epoch": 0.7328405491024287, "grad_norm": 1.8229918479919434, "learning_rate": 2.4612108006081e-06, "loss": 0.6176, "step": 10410 }, { "epoch": 0.7329109468497008, "grad_norm": 2.0706944465637207, "learning_rate": 2.4599958012673783e-06, "loss": 0.5642, "step": 10411 }, { "epoch": 0.732981344596973, "grad_norm": 1.4796069860458374, "learning_rate": 2.4587810379655835e-06, "loss": 0.4968, "step": 10412 }, { "epoch": 0.733051742344245, "grad_norm": 1.9044735431671143, "learning_rate": 2.4575665107658786e-06, "loss": 0.7521, "step": 10413 }, { "epoch": 0.7331221400915171, "grad_norm": 1.8849941492080688, "learning_rate": 2.4563522197314044e-06, "loss": 0.7458, "step": 10414 }, { "epoch": 0.7331925378387891, "grad_norm": 1.8751345872879028, "learning_rate": 2.455138164925293e-06, "loss": 0.7251, "step": 10415 }, { "epoch": 0.7332629355860613, "grad_norm": 1.7583576440811157, "learning_rate": 2.453924346410662e-06, "loss": 0.6083, "step": 10416 }, { "epoch": 0.7333333333333333, "grad_norm": 1.8714901208877563, "learning_rate": 2.4527107642506243e-06, "loss": 0.6034, "step": 10417 }, { "epoch": 0.7334037310806054, "grad_norm": 1.7321245670318604, "learning_rate": 2.4514974185082696e-06, "loss": 0.6375, "step": 10418 }, { "epoch": 0.7334741288278775, "grad_norm": 1.9577374458312988, "learning_rate": 2.450284309246686e-06, "loss": 0.691, "step": 10419 }, { "epoch": 0.7335445265751496, "grad_norm": 1.9070218801498413, "learning_rate": 2.4490714365289417e-06, "loss": 0.5542, "step": 10420 }, { "epoch": 0.7336149243224217, "grad_norm": 2.037572145462036, "learning_rate": 2.4478588004180946e-06, "loss": 0.5704, "step": 10421 }, { "epoch": 0.7336853220696937, "grad_norm": 1.761855125427246, "learning_rate": 2.446646400977189e-06, "loss": 0.5766, "step": 10422 }, { "epoch": 0.7337557198169659, "grad_norm": 2.293776512145996, "learning_rate": 2.445434238269263e-06, "loss": 0.5806, "step": 10423 }, { "epoch": 0.733826117564238, "grad_norm": 1.8481738567352295, "learning_rate": 2.444222312357333e-06, "loss": 0.6725, "step": 10424 }, { "epoch": 0.73389651531151, "grad_norm": 1.9842244386672974, "learning_rate": 2.4430106233044146e-06, "loss": 0.6621, "step": 10425 }, { "epoch": 0.7339669130587821, "grad_norm": 1.6422535181045532, "learning_rate": 2.4417991711735e-06, "loss": 0.7635, "step": 10426 }, { "epoch": 0.7340373108060542, "grad_norm": 1.657405972480774, "learning_rate": 2.4405879560275746e-06, "loss": 0.517, "step": 10427 }, { "epoch": 0.7341077085533263, "grad_norm": 2.2505364418029785, "learning_rate": 2.439376977929608e-06, "loss": 0.6319, "step": 10428 }, { "epoch": 0.7341781063005984, "grad_norm": 1.943570613861084, "learning_rate": 2.4381662369425657e-06, "loss": 0.5691, "step": 10429 }, { "epoch": 0.7342485040478705, "grad_norm": 1.802017331123352, "learning_rate": 2.43695573312939e-06, "loss": 0.6559, "step": 10430 }, { "epoch": 0.7343189017951426, "grad_norm": 2.0881924629211426, "learning_rate": 2.43574546655302e-06, "loss": 0.6334, "step": 10431 }, { "epoch": 0.7343892995424146, "grad_norm": 1.638030767440796, "learning_rate": 2.434535437276377e-06, "loss": 0.6549, "step": 10432 }, { "epoch": 0.7344596972896867, "grad_norm": 1.8457937240600586, "learning_rate": 2.433325645362372e-06, "loss": 0.6206, "step": 10433 }, { "epoch": 0.7345300950369588, "grad_norm": 1.982024908065796, "learning_rate": 2.4321160908738992e-06, "loss": 0.6987, "step": 10434 }, { "epoch": 0.7346004927842309, "grad_norm": 1.9216201305389404, "learning_rate": 2.430906773873848e-06, "loss": 0.7064, "step": 10435 }, { "epoch": 0.734670890531503, "grad_norm": 1.777143120765686, "learning_rate": 2.429697694425094e-06, "loss": 0.6104, "step": 10436 }, { "epoch": 0.734741288278775, "grad_norm": 2.3372867107391357, "learning_rate": 2.4284888525904962e-06, "loss": 0.6182, "step": 10437 }, { "epoch": 0.7348116860260472, "grad_norm": 2.173945665359497, "learning_rate": 2.427280248432903e-06, "loss": 0.7391, "step": 10438 }, { "epoch": 0.7348820837733192, "grad_norm": 1.961911916732788, "learning_rate": 2.4260718820151486e-06, "loss": 0.6536, "step": 10439 }, { "epoch": 0.7349524815205913, "grad_norm": 1.8809400796890259, "learning_rate": 2.4248637534000625e-06, "loss": 0.6689, "step": 10440 }, { "epoch": 0.7350228792678635, "grad_norm": 2.57781982421875, "learning_rate": 2.42365586265045e-06, "loss": 0.6337, "step": 10441 }, { "epoch": 0.7350932770151355, "grad_norm": 1.8725321292877197, "learning_rate": 2.4224482098291162e-06, "loss": 0.7654, "step": 10442 }, { "epoch": 0.7351636747624076, "grad_norm": 1.9231656789779663, "learning_rate": 2.4212407949988464e-06, "loss": 0.6708, "step": 10443 }, { "epoch": 0.7352340725096796, "grad_norm": 2.1322684288024902, "learning_rate": 2.4200336182224134e-06, "loss": 0.6746, "step": 10444 }, { "epoch": 0.7353044702569518, "grad_norm": 1.753417730331421, "learning_rate": 2.4188266795625793e-06, "loss": 0.6991, "step": 10445 }, { "epoch": 0.7353748680042239, "grad_norm": 1.6706979274749756, "learning_rate": 2.4176199790820962e-06, "loss": 0.5726, "step": 10446 }, { "epoch": 0.7354452657514959, "grad_norm": 2.7278974056243896, "learning_rate": 2.4164135168436988e-06, "loss": 0.5844, "step": 10447 }, { "epoch": 0.7355156634987681, "grad_norm": 1.7401866912841797, "learning_rate": 2.415207292910116e-06, "loss": 0.6045, "step": 10448 }, { "epoch": 0.7355860612460401, "grad_norm": 4.297787666320801, "learning_rate": 2.4140013073440587e-06, "loss": 0.7002, "step": 10449 }, { "epoch": 0.7356564589933122, "grad_norm": 1.51296067237854, "learning_rate": 2.4127955602082265e-06, "loss": 0.7967, "step": 10450 }, { "epoch": 0.7357268567405844, "grad_norm": 1.900515079498291, "learning_rate": 2.411590051565305e-06, "loss": 0.608, "step": 10451 }, { "epoch": 0.7357972544878564, "grad_norm": 1.6434049606323242, "learning_rate": 2.4103847814779748e-06, "loss": 0.597, "step": 10452 }, { "epoch": 0.7358676522351285, "grad_norm": 1.8188968896865845, "learning_rate": 2.409179750008894e-06, "loss": 0.6282, "step": 10453 }, { "epoch": 0.7359380499824005, "grad_norm": 1.8928173780441284, "learning_rate": 2.407974957220718e-06, "loss": 0.5754, "step": 10454 }, { "epoch": 0.7360084477296727, "grad_norm": 1.8666331768035889, "learning_rate": 2.4067704031760828e-06, "loss": 0.645, "step": 10455 }, { "epoch": 0.7360788454769447, "grad_norm": 1.9609216451644897, "learning_rate": 2.405566087937614e-06, "loss": 0.6516, "step": 10456 }, { "epoch": 0.7361492432242168, "grad_norm": 1.9499157667160034, "learning_rate": 2.4043620115679226e-06, "loss": 0.7342, "step": 10457 }, { "epoch": 0.736219640971489, "grad_norm": 2.0044138431549072, "learning_rate": 2.4031581741296153e-06, "loss": 0.7563, "step": 10458 }, { "epoch": 0.736290038718761, "grad_norm": 2.0916850566864014, "learning_rate": 2.401954575685275e-06, "loss": 0.645, "step": 10459 }, { "epoch": 0.7363604364660331, "grad_norm": 1.6393896341323853, "learning_rate": 2.400751216297483e-06, "loss": 0.6212, "step": 10460 }, { "epoch": 0.7364308342133051, "grad_norm": 2.1003224849700928, "learning_rate": 2.399548096028801e-06, "loss": 0.7691, "step": 10461 }, { "epoch": 0.7365012319605773, "grad_norm": 1.6804018020629883, "learning_rate": 2.398345214941779e-06, "loss": 0.6692, "step": 10462 }, { "epoch": 0.7365716297078494, "grad_norm": 1.8765761852264404, "learning_rate": 2.397142573098955e-06, "loss": 0.7727, "step": 10463 }, { "epoch": 0.7366420274551214, "grad_norm": 1.759555459022522, "learning_rate": 2.39594017056286e-06, "loss": 0.5348, "step": 10464 }, { "epoch": 0.7367124252023936, "grad_norm": 2.049685478210449, "learning_rate": 2.394738007396002e-06, "loss": 0.6694, "step": 10465 }, { "epoch": 0.7367828229496656, "grad_norm": 1.8037489652633667, "learning_rate": 2.3935360836608887e-06, "loss": 0.7069, "step": 10466 }, { "epoch": 0.7368532206969377, "grad_norm": 1.8997001647949219, "learning_rate": 2.392334399420006e-06, "loss": 0.6244, "step": 10467 }, { "epoch": 0.7369236184442098, "grad_norm": 1.7653170824050903, "learning_rate": 2.391132954735831e-06, "loss": 0.6293, "step": 10468 }, { "epoch": 0.7369940161914819, "grad_norm": 1.9489895105361938, "learning_rate": 2.389931749670825e-06, "loss": 0.742, "step": 10469 }, { "epoch": 0.737064413938754, "grad_norm": 1.6220675706863403, "learning_rate": 2.388730784287445e-06, "loss": 0.6482, "step": 10470 }, { "epoch": 0.737134811686026, "grad_norm": 1.9206750392913818, "learning_rate": 2.387530058648126e-06, "loss": 0.631, "step": 10471 }, { "epoch": 0.7372052094332981, "grad_norm": 2.1477701663970947, "learning_rate": 2.386329572815298e-06, "loss": 0.7374, "step": 10472 }, { "epoch": 0.7372756071805702, "grad_norm": 1.7043741941452026, "learning_rate": 2.3851293268513737e-06, "loss": 0.604, "step": 10473 }, { "epoch": 0.7373460049278423, "grad_norm": 2.0087103843688965, "learning_rate": 2.3839293208187562e-06, "loss": 0.595, "step": 10474 }, { "epoch": 0.7374164026751144, "grad_norm": 1.8091349601745605, "learning_rate": 2.3827295547798305e-06, "loss": 0.5985, "step": 10475 }, { "epoch": 0.7374868004223865, "grad_norm": 1.6196082830429077, "learning_rate": 2.38153002879698e-06, "loss": 0.7008, "step": 10476 }, { "epoch": 0.7375571981696586, "grad_norm": 1.8437230587005615, "learning_rate": 2.380330742932563e-06, "loss": 0.725, "step": 10477 }, { "epoch": 0.7376275959169306, "grad_norm": 2.1074459552764893, "learning_rate": 2.379131697248938e-06, "loss": 0.6177, "step": 10478 }, { "epoch": 0.7376979936642027, "grad_norm": 1.6842234134674072, "learning_rate": 2.37793289180844e-06, "loss": 0.7677, "step": 10479 }, { "epoch": 0.7377683914114749, "grad_norm": 1.9509350061416626, "learning_rate": 2.3767343266733974e-06, "loss": 0.7474, "step": 10480 }, { "epoch": 0.7378387891587469, "grad_norm": 1.9238464832305908, "learning_rate": 2.3755360019061215e-06, "loss": 0.6049, "step": 10481 }, { "epoch": 0.737909186906019, "grad_norm": 1.7382951974868774, "learning_rate": 2.374337917568919e-06, "loss": 0.6251, "step": 10482 }, { "epoch": 0.737979584653291, "grad_norm": 1.696651577949524, "learning_rate": 2.3731400737240766e-06, "loss": 0.6367, "step": 10483 }, { "epoch": 0.7380499824005632, "grad_norm": 1.550777554512024, "learning_rate": 2.3719424704338704e-06, "loss": 0.5493, "step": 10484 }, { "epoch": 0.7381203801478353, "grad_norm": 1.930772304534912, "learning_rate": 2.370745107760572e-06, "loss": 0.7781, "step": 10485 }, { "epoch": 0.7381907778951073, "grad_norm": 1.7199431657791138, "learning_rate": 2.3695479857664236e-06, "loss": 0.5549, "step": 10486 }, { "epoch": 0.7382611756423795, "grad_norm": 2.1171786785125732, "learning_rate": 2.368351104513671e-06, "loss": 0.625, "step": 10487 }, { "epoch": 0.7383315733896515, "grad_norm": 1.7607247829437256, "learning_rate": 2.3671544640645362e-06, "loss": 0.6375, "step": 10488 }, { "epoch": 0.7384019711369236, "grad_norm": 1.806304693222046, "learning_rate": 2.36595806448124e-06, "loss": 0.6446, "step": 10489 }, { "epoch": 0.7384723688841957, "grad_norm": 2.137054204940796, "learning_rate": 2.3647619058259782e-06, "loss": 0.6451, "step": 10490 }, { "epoch": 0.7385427666314678, "grad_norm": 1.986095905303955, "learning_rate": 2.363565988160947e-06, "loss": 0.7649, "step": 10491 }, { "epoch": 0.7386131643787399, "grad_norm": 1.7595442533493042, "learning_rate": 2.3623703115483145e-06, "loss": 0.5946, "step": 10492 }, { "epoch": 0.7386835621260119, "grad_norm": 1.5982320308685303, "learning_rate": 2.361174876050252e-06, "loss": 0.5926, "step": 10493 }, { "epoch": 0.7387539598732841, "grad_norm": 1.5866200923919678, "learning_rate": 2.3599796817289076e-06, "loss": 0.6657, "step": 10494 }, { "epoch": 0.7388243576205561, "grad_norm": 2.4669578075408936, "learning_rate": 2.3587847286464224e-06, "loss": 0.6072, "step": 10495 }, { "epoch": 0.7388947553678282, "grad_norm": 1.8730976581573486, "learning_rate": 2.3575900168649217e-06, "loss": 0.6237, "step": 10496 }, { "epoch": 0.7389651531151004, "grad_norm": 2.230947494506836, "learning_rate": 2.3563955464465246e-06, "loss": 0.6387, "step": 10497 }, { "epoch": 0.7390355508623724, "grad_norm": 1.91227388381958, "learning_rate": 2.3552013174533235e-06, "loss": 0.607, "step": 10498 }, { "epoch": 0.7391059486096445, "grad_norm": 1.7398139238357544, "learning_rate": 2.354007329947415e-06, "loss": 0.7235, "step": 10499 }, { "epoch": 0.7391763463569165, "grad_norm": 2.238408088684082, "learning_rate": 2.3528135839908706e-06, "loss": 0.6318, "step": 10500 }, { "epoch": 0.7392467441041887, "grad_norm": 1.6764370203018188, "learning_rate": 2.351620079645759e-06, "loss": 0.642, "step": 10501 }, { "epoch": 0.7393171418514608, "grad_norm": 2.0645358562469482, "learning_rate": 2.350426816974127e-06, "loss": 0.6464, "step": 10502 }, { "epoch": 0.7393875395987328, "grad_norm": 1.9840205907821655, "learning_rate": 2.34923379603802e-06, "loss": 0.6137, "step": 10503 }, { "epoch": 0.739457937346005, "grad_norm": 1.9744752645492554, "learning_rate": 2.3480410168994566e-06, "loss": 0.61, "step": 10504 }, { "epoch": 0.739528335093277, "grad_norm": 2.0628254413604736, "learning_rate": 2.3468484796204545e-06, "loss": 0.6675, "step": 10505 }, { "epoch": 0.7395987328405491, "grad_norm": 1.928671956062317, "learning_rate": 2.345656184263013e-06, "loss": 0.7155, "step": 10506 }, { "epoch": 0.7396691305878212, "grad_norm": 1.8981844186782837, "learning_rate": 2.3444641308891236e-06, "loss": 0.6502, "step": 10507 }, { "epoch": 0.7397395283350933, "grad_norm": 1.9040600061416626, "learning_rate": 2.3432723195607576e-06, "loss": 0.6461, "step": 10508 }, { "epoch": 0.7398099260823654, "grad_norm": 1.9066990613937378, "learning_rate": 2.342080750339884e-06, "loss": 0.6429, "step": 10509 }, { "epoch": 0.7398803238296374, "grad_norm": 1.5089294910430908, "learning_rate": 2.3408894232884505e-06, "loss": 0.5803, "step": 10510 }, { "epoch": 0.7399507215769096, "grad_norm": 2.0545363426208496, "learning_rate": 2.339698338468395e-06, "loss": 0.6374, "step": 10511 }, { "epoch": 0.7400211193241816, "grad_norm": 1.8673263788223267, "learning_rate": 2.3385074959416418e-06, "loss": 0.7099, "step": 10512 }, { "epoch": 0.7400915170714537, "grad_norm": 2.248331308364868, "learning_rate": 2.337316895770107e-06, "loss": 0.6058, "step": 10513 }, { "epoch": 0.7401619148187258, "grad_norm": 1.9380592107772827, "learning_rate": 2.3361265380156886e-06, "loss": 0.645, "step": 10514 }, { "epoch": 0.7402323125659979, "grad_norm": 3.016052484512329, "learning_rate": 2.3349364227402764e-06, "loss": 0.6806, "step": 10515 }, { "epoch": 0.74030271031327, "grad_norm": 2.0433542728424072, "learning_rate": 2.3337465500057453e-06, "loss": 0.7767, "step": 10516 }, { "epoch": 0.740373108060542, "grad_norm": 1.9028315544128418, "learning_rate": 2.332556919873957e-06, "loss": 0.5916, "step": 10517 }, { "epoch": 0.7404435058078142, "grad_norm": 1.9162169694900513, "learning_rate": 2.3313675324067586e-06, "loss": 0.6761, "step": 10518 }, { "epoch": 0.7405139035550863, "grad_norm": 2.0634725093841553, "learning_rate": 2.330178387665993e-06, "loss": 0.5784, "step": 10519 }, { "epoch": 0.7405843013023583, "grad_norm": 1.7845202684402466, "learning_rate": 2.3289894857134803e-06, "loss": 0.6806, "step": 10520 }, { "epoch": 0.7406546990496304, "grad_norm": 1.9373652935028076, "learning_rate": 2.3278008266110367e-06, "loss": 0.635, "step": 10521 }, { "epoch": 0.7407250967969025, "grad_norm": 1.8783118724822998, "learning_rate": 2.3266124104204593e-06, "loss": 0.6907, "step": 10522 }, { "epoch": 0.7407954945441746, "grad_norm": 1.87022864818573, "learning_rate": 2.325424237203535e-06, "loss": 0.6094, "step": 10523 }, { "epoch": 0.7408658922914467, "grad_norm": 2.2441649436950684, "learning_rate": 2.3242363070220366e-06, "loss": 0.6534, "step": 10524 }, { "epoch": 0.7409362900387187, "grad_norm": 1.7782258987426758, "learning_rate": 2.323048619937729e-06, "loss": 0.5735, "step": 10525 }, { "epoch": 0.7410066877859909, "grad_norm": 1.98042893409729, "learning_rate": 2.321861176012357e-06, "loss": 0.6348, "step": 10526 }, { "epoch": 0.7410770855332629, "grad_norm": 2.097153425216675, "learning_rate": 2.3206739753076612e-06, "loss": 0.6947, "step": 10527 }, { "epoch": 0.741147483280535, "grad_norm": 1.954114556312561, "learning_rate": 2.319487017885364e-06, "loss": 0.6938, "step": 10528 }, { "epoch": 0.7412178810278071, "grad_norm": 1.9059178829193115, "learning_rate": 2.3183003038071752e-06, "loss": 0.5762, "step": 10529 }, { "epoch": 0.7412882787750792, "grad_norm": 2.0093929767608643, "learning_rate": 2.3171138331347912e-06, "loss": 0.6769, "step": 10530 }, { "epoch": 0.7413586765223513, "grad_norm": 2.3364927768707275, "learning_rate": 2.315927605929903e-06, "loss": 0.5081, "step": 10531 }, { "epoch": 0.7414290742696233, "grad_norm": 1.8921394348144531, "learning_rate": 2.314741622254178e-06, "loss": 0.6415, "step": 10532 }, { "epoch": 0.7414994720168955, "grad_norm": 1.791669487953186, "learning_rate": 2.3135558821692817e-06, "loss": 0.636, "step": 10533 }, { "epoch": 0.7415698697641675, "grad_norm": 1.988541603088379, "learning_rate": 2.3123703857368595e-06, "loss": 0.6799, "step": 10534 }, { "epoch": 0.7416402675114396, "grad_norm": 1.7247769832611084, "learning_rate": 2.311185133018544e-06, "loss": 0.6291, "step": 10535 }, { "epoch": 0.7417106652587118, "grad_norm": 1.8353080749511719, "learning_rate": 2.310000124075961e-06, "loss": 0.7331, "step": 10536 }, { "epoch": 0.7417810630059838, "grad_norm": 1.6279698610305786, "learning_rate": 2.3088153589707182e-06, "loss": 0.6396, "step": 10537 }, { "epoch": 0.7418514607532559, "grad_norm": 1.8527170419692993, "learning_rate": 2.3076308377644156e-06, "loss": 0.6358, "step": 10538 }, { "epoch": 0.7419218585005279, "grad_norm": 1.7349870204925537, "learning_rate": 2.3064465605186353e-06, "loss": 0.6981, "step": 10539 }, { "epoch": 0.7419922562478001, "grad_norm": 2.0764689445495605, "learning_rate": 2.3052625272949486e-06, "loss": 0.6935, "step": 10540 }, { "epoch": 0.7420626539950722, "grad_norm": 1.9295141696929932, "learning_rate": 2.3040787381549133e-06, "loss": 0.7102, "step": 10541 }, { "epoch": 0.7421330517423442, "grad_norm": 1.8588615655899048, "learning_rate": 2.3028951931600785e-06, "loss": 0.7562, "step": 10542 }, { "epoch": 0.7422034494896164, "grad_norm": 2.0266120433807373, "learning_rate": 2.301711892371975e-06, "loss": 0.6798, "step": 10543 }, { "epoch": 0.7422738472368884, "grad_norm": 1.7685116529464722, "learning_rate": 2.3005288358521278e-06, "loss": 0.6263, "step": 10544 }, { "epoch": 0.7423442449841605, "grad_norm": 1.847768783569336, "learning_rate": 2.299346023662042e-06, "loss": 0.6433, "step": 10545 }, { "epoch": 0.7424146427314325, "grad_norm": 1.7625268697738647, "learning_rate": 2.2981634558632138e-06, "loss": 0.7161, "step": 10546 }, { "epoch": 0.7424850404787047, "grad_norm": 1.9597043991088867, "learning_rate": 2.2969811325171237e-06, "loss": 0.663, "step": 10547 }, { "epoch": 0.7425554382259768, "grad_norm": 1.7411106824874878, "learning_rate": 2.295799053685246e-06, "loss": 0.7304, "step": 10548 }, { "epoch": 0.7426258359732488, "grad_norm": 1.8142436742782593, "learning_rate": 2.2946172194290327e-06, "loss": 0.6375, "step": 10549 }, { "epoch": 0.742696233720521, "grad_norm": 2.066965341567993, "learning_rate": 2.2934356298099353e-06, "loss": 0.5805, "step": 10550 }, { "epoch": 0.742766631467793, "grad_norm": 3.818808078765869, "learning_rate": 2.292254284889381e-06, "loss": 0.692, "step": 10551 }, { "epoch": 0.7428370292150651, "grad_norm": 1.6014583110809326, "learning_rate": 2.291073184728791e-06, "loss": 0.5261, "step": 10552 }, { "epoch": 0.7429074269623372, "grad_norm": 1.7680667638778687, "learning_rate": 2.2898923293895677e-06, "loss": 0.5833, "step": 10553 }, { "epoch": 0.7429778247096093, "grad_norm": 1.8301472663879395, "learning_rate": 2.28871171893311e-06, "loss": 0.7125, "step": 10554 }, { "epoch": 0.7430482224568814, "grad_norm": 2.466336250305176, "learning_rate": 2.2875313534207956e-06, "loss": 0.6305, "step": 10555 }, { "epoch": 0.7431186202041534, "grad_norm": 2.06211256980896, "learning_rate": 2.286351232913995e-06, "loss": 0.6594, "step": 10556 }, { "epoch": 0.7431890179514256, "grad_norm": 1.9201442003250122, "learning_rate": 2.2851713574740635e-06, "loss": 0.666, "step": 10557 }, { "epoch": 0.7432594156986977, "grad_norm": 1.6814101934432983, "learning_rate": 2.2839917271623426e-06, "loss": 0.6159, "step": 10558 }, { "epoch": 0.7433298134459697, "grad_norm": 1.7780567407608032, "learning_rate": 2.282812342040161e-06, "loss": 0.5419, "step": 10559 }, { "epoch": 0.7434002111932418, "grad_norm": 1.772312045097351, "learning_rate": 2.2816332021688406e-06, "loss": 0.5968, "step": 10560 }, { "epoch": 0.7434706089405139, "grad_norm": 1.8614624738693237, "learning_rate": 2.2804543076096807e-06, "loss": 0.6431, "step": 10561 }, { "epoch": 0.743541006687786, "grad_norm": 1.885438323020935, "learning_rate": 2.2792756584239776e-06, "loss": 0.6501, "step": 10562 }, { "epoch": 0.7436114044350581, "grad_norm": 1.694577693939209, "learning_rate": 2.278097254673008e-06, "loss": 0.6575, "step": 10563 }, { "epoch": 0.7436818021823302, "grad_norm": 1.7613595724105835, "learning_rate": 2.276919096418039e-06, "loss": 0.7041, "step": 10564 }, { "epoch": 0.7437521999296023, "grad_norm": 1.8277180194854736, "learning_rate": 2.2757411837203216e-06, "loss": 0.5762, "step": 10565 }, { "epoch": 0.7438225976768743, "grad_norm": 2.2678117752075195, "learning_rate": 2.2745635166411e-06, "loss": 0.745, "step": 10566 }, { "epoch": 0.7438929954241464, "grad_norm": 1.979601263999939, "learning_rate": 2.2733860952415994e-06, "loss": 0.6183, "step": 10567 }, { "epoch": 0.7439633931714185, "grad_norm": 2.06463885307312, "learning_rate": 2.2722089195830385e-06, "loss": 0.6305, "step": 10568 }, { "epoch": 0.7440337909186906, "grad_norm": 1.4106758832931519, "learning_rate": 2.2710319897266177e-06, "loss": 0.6743, "step": 10569 }, { "epoch": 0.7441041886659627, "grad_norm": 1.9700182676315308, "learning_rate": 2.269855305733526e-06, "loss": 0.5969, "step": 10570 }, { "epoch": 0.7441745864132348, "grad_norm": 2.176574945449829, "learning_rate": 2.268678867664939e-06, "loss": 0.7174, "step": 10571 }, { "epoch": 0.7442449841605069, "grad_norm": 2.255629301071167, "learning_rate": 2.2675026755820247e-06, "loss": 0.7125, "step": 10572 }, { "epoch": 0.7443153819077789, "grad_norm": 1.8950474262237549, "learning_rate": 2.2663267295459306e-06, "loss": 0.6181, "step": 10573 }, { "epoch": 0.744385779655051, "grad_norm": 1.5122852325439453, "learning_rate": 2.2651510296177984e-06, "loss": 0.6265, "step": 10574 }, { "epoch": 0.7444561774023232, "grad_norm": 1.7592151165008545, "learning_rate": 2.2639755758587528e-06, "loss": 0.7046, "step": 10575 }, { "epoch": 0.7445265751495952, "grad_norm": 1.8019400835037231, "learning_rate": 2.262800368329906e-06, "loss": 0.6763, "step": 10576 }, { "epoch": 0.7445969728968673, "grad_norm": 1.781788945198059, "learning_rate": 2.2616254070923554e-06, "loss": 0.6215, "step": 10577 }, { "epoch": 0.7446673706441393, "grad_norm": 1.973240613937378, "learning_rate": 2.260450692207194e-06, "loss": 0.6346, "step": 10578 }, { "epoch": 0.7447377683914115, "grad_norm": 2.031369924545288, "learning_rate": 2.2592762237354918e-06, "loss": 0.6733, "step": 10579 }, { "epoch": 0.7448081661386836, "grad_norm": 1.998897671699524, "learning_rate": 2.258102001738314e-06, "loss": 0.5662, "step": 10580 }, { "epoch": 0.7448785638859556, "grad_norm": 2.1013376712799072, "learning_rate": 2.256928026276708e-06, "loss": 0.6428, "step": 10581 }, { "epoch": 0.7449489616332278, "grad_norm": 2.278740882873535, "learning_rate": 2.255754297411709e-06, "loss": 0.6931, "step": 10582 }, { "epoch": 0.7450193593804998, "grad_norm": 1.7296150922775269, "learning_rate": 2.254580815204338e-06, "loss": 0.6949, "step": 10583 }, { "epoch": 0.7450897571277719, "grad_norm": 1.907201886177063, "learning_rate": 2.253407579715611e-06, "loss": 0.6387, "step": 10584 }, { "epoch": 0.745160154875044, "grad_norm": 2.2260994911193848, "learning_rate": 2.252234591006521e-06, "loss": 0.7017, "step": 10585 }, { "epoch": 0.7452305526223161, "grad_norm": 1.9866023063659668, "learning_rate": 2.251061849138054e-06, "loss": 0.607, "step": 10586 }, { "epoch": 0.7453009503695882, "grad_norm": 2.24946928024292, "learning_rate": 2.2498893541711866e-06, "loss": 0.7478, "step": 10587 }, { "epoch": 0.7453713481168602, "grad_norm": 1.8091965913772583, "learning_rate": 2.2487171061668686e-06, "loss": 0.6335, "step": 10588 }, { "epoch": 0.7454417458641324, "grad_norm": 2.271479606628418, "learning_rate": 2.2475451051860547e-06, "loss": 0.6173, "step": 10589 }, { "epoch": 0.7455121436114044, "grad_norm": 1.828674077987671, "learning_rate": 2.2463733512896723e-06, "loss": 0.7544, "step": 10590 }, { "epoch": 0.7455825413586765, "grad_norm": 1.7478679418563843, "learning_rate": 2.2452018445386465e-06, "loss": 0.692, "step": 10591 }, { "epoch": 0.7456529391059487, "grad_norm": 1.9546513557434082, "learning_rate": 2.2440305849938805e-06, "loss": 0.6129, "step": 10592 }, { "epoch": 0.7457233368532207, "grad_norm": 2.332404136657715, "learning_rate": 2.2428595727162765e-06, "loss": 0.8092, "step": 10593 }, { "epoch": 0.7457937346004928, "grad_norm": 1.8025412559509277, "learning_rate": 2.241688807766707e-06, "loss": 0.6384, "step": 10594 }, { "epoch": 0.7458641323477648, "grad_norm": 1.826151728630066, "learning_rate": 2.240518290206048e-06, "loss": 0.6578, "step": 10595 }, { "epoch": 0.745934530095037, "grad_norm": 1.936958909034729, "learning_rate": 2.239348020095151e-06, "loss": 0.6509, "step": 10596 }, { "epoch": 0.7460049278423091, "grad_norm": 1.381302833557129, "learning_rate": 2.2381779974948647e-06, "loss": 0.6808, "step": 10597 }, { "epoch": 0.7460753255895811, "grad_norm": 1.9236586093902588, "learning_rate": 2.237008222466015e-06, "loss": 0.6144, "step": 10598 }, { "epoch": 0.7461457233368533, "grad_norm": 2.0570061206817627, "learning_rate": 2.2358386950694255e-06, "loss": 0.7297, "step": 10599 }, { "epoch": 0.7462161210841253, "grad_norm": 2.3556807041168213, "learning_rate": 2.2346694153658916e-06, "loss": 0.7333, "step": 10600 }, { "epoch": 0.7462865188313974, "grad_norm": 1.9679410457611084, "learning_rate": 2.233500383416214e-06, "loss": 0.6945, "step": 10601 }, { "epoch": 0.7463569165786695, "grad_norm": 2.162771224975586, "learning_rate": 2.232331599281165e-06, "loss": 0.6105, "step": 10602 }, { "epoch": 0.7464273143259416, "grad_norm": 2.0052895545959473, "learning_rate": 2.2311630630215175e-06, "loss": 0.5834, "step": 10603 }, { "epoch": 0.7464977120732137, "grad_norm": 1.9079086780548096, "learning_rate": 2.229994774698018e-06, "loss": 0.629, "step": 10604 }, { "epoch": 0.7465681098204857, "grad_norm": 2.191671371459961, "learning_rate": 2.228826734371415e-06, "loss": 0.709, "step": 10605 }, { "epoch": 0.7466385075677578, "grad_norm": 1.7414783239364624, "learning_rate": 2.2276589421024264e-06, "loss": 0.7053, "step": 10606 }, { "epoch": 0.7467089053150299, "grad_norm": 2.101712226867676, "learning_rate": 2.226491397951774e-06, "loss": 0.6421, "step": 10607 }, { "epoch": 0.746779303062302, "grad_norm": 1.9467554092407227, "learning_rate": 2.2253241019801545e-06, "loss": 0.6301, "step": 10608 }, { "epoch": 0.7468497008095741, "grad_norm": 1.9562954902648926, "learning_rate": 2.2241570542482612e-06, "loss": 0.6376, "step": 10609 }, { "epoch": 0.7469200985568462, "grad_norm": 1.6970778703689575, "learning_rate": 2.2229902548167657e-06, "loss": 0.5484, "step": 10610 }, { "epoch": 0.7469904963041183, "grad_norm": 2.2862560749053955, "learning_rate": 2.2218237037463387e-06, "loss": 0.6886, "step": 10611 }, { "epoch": 0.7470608940513903, "grad_norm": 1.7352159023284912, "learning_rate": 2.2206574010976186e-06, "loss": 0.6228, "step": 10612 }, { "epoch": 0.7471312917986624, "grad_norm": 1.8268035650253296, "learning_rate": 2.2194913469312524e-06, "loss": 0.63, "step": 10613 }, { "epoch": 0.7472016895459346, "grad_norm": 2.1068100929260254, "learning_rate": 2.2183255413078577e-06, "loss": 0.6312, "step": 10614 }, { "epoch": 0.7472720872932066, "grad_norm": 3.098752498626709, "learning_rate": 2.2171599842880503e-06, "loss": 0.649, "step": 10615 }, { "epoch": 0.7473424850404787, "grad_norm": 2.9756033420562744, "learning_rate": 2.2159946759324256e-06, "loss": 0.6778, "step": 10616 }, { "epoch": 0.7474128827877508, "grad_norm": 2.46358323097229, "learning_rate": 2.2148296163015723e-06, "loss": 0.6402, "step": 10617 }, { "epoch": 0.7474832805350229, "grad_norm": 1.854234218597412, "learning_rate": 2.21366480545606e-06, "loss": 0.6382, "step": 10618 }, { "epoch": 0.747553678282295, "grad_norm": 2.1957812309265137, "learning_rate": 2.2125002434564505e-06, "loss": 0.6809, "step": 10619 }, { "epoch": 0.747624076029567, "grad_norm": 1.6664396524429321, "learning_rate": 2.211335930363287e-06, "loss": 0.539, "step": 10620 }, { "epoch": 0.7476944737768392, "grad_norm": 2.0633935928344727, "learning_rate": 2.2101718662371064e-06, "loss": 0.6605, "step": 10621 }, { "epoch": 0.7477648715241112, "grad_norm": 2.3708279132843018, "learning_rate": 2.209008051138427e-06, "loss": 0.7537, "step": 10622 }, { "epoch": 0.7478352692713833, "grad_norm": 1.6324924230575562, "learning_rate": 2.20784448512776e-06, "loss": 0.6318, "step": 10623 }, { "epoch": 0.7479056670186554, "grad_norm": 2.493410348892212, "learning_rate": 2.2066811682655985e-06, "loss": 0.7097, "step": 10624 }, { "epoch": 0.7479760647659275, "grad_norm": 2.473609209060669, "learning_rate": 2.2055181006124234e-06, "loss": 0.7241, "step": 10625 }, { "epoch": 0.7480464625131996, "grad_norm": 2.0159494876861572, "learning_rate": 2.2043552822287034e-06, "loss": 0.5871, "step": 10626 }, { "epoch": 0.7481168602604716, "grad_norm": 1.7585728168487549, "learning_rate": 2.203192713174897e-06, "loss": 0.6251, "step": 10627 }, { "epoch": 0.7481872580077438, "grad_norm": 1.8980423212051392, "learning_rate": 2.2020303935114434e-06, "loss": 0.6392, "step": 10628 }, { "epoch": 0.7482576557550158, "grad_norm": 2.0884852409362793, "learning_rate": 2.2008683232987773e-06, "loss": 0.6744, "step": 10629 }, { "epoch": 0.7483280535022879, "grad_norm": 2.129077196121216, "learning_rate": 2.199706502597313e-06, "loss": 0.6864, "step": 10630 }, { "epoch": 0.7483984512495601, "grad_norm": 2.0466761589050293, "learning_rate": 2.1985449314674557e-06, "loss": 0.6641, "step": 10631 }, { "epoch": 0.7484688489968321, "grad_norm": 1.8732798099517822, "learning_rate": 2.197383609969593e-06, "loss": 0.7612, "step": 10632 }, { "epoch": 0.7485392467441042, "grad_norm": 1.8200587034225464, "learning_rate": 2.196222538164109e-06, "loss": 0.6479, "step": 10633 }, { "epoch": 0.7486096444913762, "grad_norm": 2.056102991104126, "learning_rate": 2.195061716111363e-06, "loss": 0.6624, "step": 10634 }, { "epoch": 0.7486800422386484, "grad_norm": 2.0249950885772705, "learning_rate": 2.193901143871712e-06, "loss": 0.6503, "step": 10635 }, { "epoch": 0.7487504399859205, "grad_norm": 1.961533784866333, "learning_rate": 2.192740821505494e-06, "loss": 0.6496, "step": 10636 }, { "epoch": 0.7488208377331925, "grad_norm": 1.7750868797302246, "learning_rate": 2.191580749073031e-06, "loss": 0.7344, "step": 10637 }, { "epoch": 0.7488912354804647, "grad_norm": 1.64966881275177, "learning_rate": 2.190420926634643e-06, "loss": 0.6325, "step": 10638 }, { "epoch": 0.7489616332277367, "grad_norm": 1.908503532409668, "learning_rate": 2.189261354250624e-06, "loss": 0.5313, "step": 10639 }, { "epoch": 0.7490320309750088, "grad_norm": 1.7530661821365356, "learning_rate": 2.188102031981266e-06, "loss": 0.5169, "step": 10640 }, { "epoch": 0.7491024287222808, "grad_norm": 1.8074010610580444, "learning_rate": 2.1869429598868417e-06, "loss": 0.5471, "step": 10641 }, { "epoch": 0.749172826469553, "grad_norm": 1.8718974590301514, "learning_rate": 2.185784138027612e-06, "loss": 0.6443, "step": 10642 }, { "epoch": 0.7492432242168251, "grad_norm": 1.9758082628250122, "learning_rate": 2.1846255664638225e-06, "loss": 0.6159, "step": 10643 }, { "epoch": 0.7493136219640971, "grad_norm": 1.5645431280136108, "learning_rate": 2.1834672452557133e-06, "loss": 0.6117, "step": 10644 }, { "epoch": 0.7493840197113693, "grad_norm": 1.683648943901062, "learning_rate": 2.1823091744635014e-06, "loss": 0.6039, "step": 10645 }, { "epoch": 0.7494544174586413, "grad_norm": 2.002256155014038, "learning_rate": 2.181151354147401e-06, "loss": 0.5373, "step": 10646 }, { "epoch": 0.7495248152059134, "grad_norm": 1.6662653684616089, "learning_rate": 2.179993784367606e-06, "loss": 0.6556, "step": 10647 }, { "epoch": 0.7495952129531855, "grad_norm": 1.9436935186386108, "learning_rate": 2.1788364651843002e-06, "loss": 0.6544, "step": 10648 }, { "epoch": 0.7496656107004576, "grad_norm": 2.2735185623168945, "learning_rate": 2.17767939665765e-06, "loss": 0.6164, "step": 10649 }, { "epoch": 0.7497360084477297, "grad_norm": 1.7908589839935303, "learning_rate": 2.1765225788478175e-06, "loss": 0.6308, "step": 10650 }, { "epoch": 0.7498064061950017, "grad_norm": 1.828737735748291, "learning_rate": 2.1753660118149423e-06, "loss": 0.6804, "step": 10651 }, { "epoch": 0.7498768039422739, "grad_norm": 1.8351680040359497, "learning_rate": 2.1742096956191603e-06, "loss": 0.6518, "step": 10652 }, { "epoch": 0.749947201689546, "grad_norm": 3.11674165725708, "learning_rate": 2.173053630320586e-06, "loss": 0.6343, "step": 10653 }, { "epoch": 0.750017599436818, "grad_norm": 2.0284154415130615, "learning_rate": 2.1718978159793252e-06, "loss": 0.6652, "step": 10654 }, { "epoch": 0.7500879971840901, "grad_norm": 1.9835914373397827, "learning_rate": 2.1707422526554673e-06, "loss": 0.7466, "step": 10655 }, { "epoch": 0.7501583949313622, "grad_norm": 2.1343541145324707, "learning_rate": 2.1695869404090957e-06, "loss": 0.5813, "step": 10656 }, { "epoch": 0.7502287926786343, "grad_norm": 1.7800339460372925, "learning_rate": 2.1684318793002715e-06, "loss": 0.7373, "step": 10657 }, { "epoch": 0.7502991904259064, "grad_norm": 1.8742040395736694, "learning_rate": 2.167277069389052e-06, "loss": 0.7699, "step": 10658 }, { "epoch": 0.7503695881731784, "grad_norm": 1.7654849290847778, "learning_rate": 2.166122510735474e-06, "loss": 0.7129, "step": 10659 }, { "epoch": 0.7504399859204506, "grad_norm": 2.039085626602173, "learning_rate": 2.164968203399565e-06, "loss": 0.7505, "step": 10660 }, { "epoch": 0.7505103836677226, "grad_norm": 2.0826282501220703, "learning_rate": 2.163814147441336e-06, "loss": 0.6616, "step": 10661 }, { "epoch": 0.7505807814149947, "grad_norm": 1.8307174444198608, "learning_rate": 2.162660342920791e-06, "loss": 0.5668, "step": 10662 }, { "epoch": 0.7506511791622668, "grad_norm": 2.2269411087036133, "learning_rate": 2.161506789897913e-06, "loss": 0.5919, "step": 10663 }, { "epoch": 0.7507215769095389, "grad_norm": 1.8220231533050537, "learning_rate": 2.160353488432682e-06, "loss": 0.652, "step": 10664 }, { "epoch": 0.750791974656811, "grad_norm": 1.7989200353622437, "learning_rate": 2.159200438585056e-06, "loss": 0.685, "step": 10665 }, { "epoch": 0.750862372404083, "grad_norm": 1.7695039510726929, "learning_rate": 2.158047640414983e-06, "loss": 0.6239, "step": 10666 }, { "epoch": 0.7509327701513552, "grad_norm": 2.227508068084717, "learning_rate": 2.1568950939823964e-06, "loss": 0.7655, "step": 10667 }, { "epoch": 0.7510031678986272, "grad_norm": 1.803942084312439, "learning_rate": 2.155742799347223e-06, "loss": 0.6639, "step": 10668 }, { "epoch": 0.7510735656458993, "grad_norm": 2.3394241333007812, "learning_rate": 2.1545907565693654e-06, "loss": 0.5958, "step": 10669 }, { "epoch": 0.7511439633931715, "grad_norm": 1.8669021129608154, "learning_rate": 2.1534389657087245e-06, "loss": 0.661, "step": 10670 }, { "epoch": 0.7512143611404435, "grad_norm": 2.707916259765625, "learning_rate": 2.1522874268251825e-06, "loss": 0.6391, "step": 10671 }, { "epoch": 0.7512847588877156, "grad_norm": 2.7855446338653564, "learning_rate": 2.1511361399786063e-06, "loss": 0.6529, "step": 10672 }, { "epoch": 0.7513551566349876, "grad_norm": 1.8055672645568848, "learning_rate": 2.149985105228852e-06, "loss": 0.6411, "step": 10673 }, { "epoch": 0.7514255543822598, "grad_norm": 2.174635410308838, "learning_rate": 2.1488343226357667e-06, "loss": 0.6074, "step": 10674 }, { "epoch": 0.7514959521295319, "grad_norm": 1.7411386966705322, "learning_rate": 2.147683792259176e-06, "loss": 0.6557, "step": 10675 }, { "epoch": 0.7515663498768039, "grad_norm": 1.8564995527267456, "learning_rate": 2.1465335141589016e-06, "loss": 0.6396, "step": 10676 }, { "epoch": 0.7516367476240761, "grad_norm": 2.583056926727295, "learning_rate": 2.1453834883947454e-06, "loss": 0.6752, "step": 10677 }, { "epoch": 0.7517071453713481, "grad_norm": 1.6526085138320923, "learning_rate": 2.1442337150264983e-06, "loss": 0.7642, "step": 10678 }, { "epoch": 0.7517775431186202, "grad_norm": 1.755053162574768, "learning_rate": 2.1430841941139356e-06, "loss": 0.6392, "step": 10679 }, { "epoch": 0.7518479408658922, "grad_norm": 2.0691592693328857, "learning_rate": 2.1419349257168268e-06, "loss": 0.6751, "step": 10680 }, { "epoch": 0.7519183386131644, "grad_norm": 1.9485375881195068, "learning_rate": 2.1407859098949186e-06, "loss": 0.6569, "step": 10681 }, { "epoch": 0.7519887363604365, "grad_norm": 1.9625542163848877, "learning_rate": 2.1396371467079545e-06, "loss": 0.6449, "step": 10682 }, { "epoch": 0.7520591341077085, "grad_norm": 1.7881028652191162, "learning_rate": 2.1384886362156557e-06, "loss": 0.5658, "step": 10683 }, { "epoch": 0.7521295318549807, "grad_norm": 2.0859737396240234, "learning_rate": 2.1373403784777363e-06, "loss": 0.6781, "step": 10684 }, { "epoch": 0.7521999296022527, "grad_norm": 1.7906486988067627, "learning_rate": 2.1361923735538924e-06, "loss": 0.7511, "step": 10685 }, { "epoch": 0.7522703273495248, "grad_norm": 1.7091587781906128, "learning_rate": 2.135044621503812e-06, "loss": 0.6551, "step": 10686 }, { "epoch": 0.752340725096797, "grad_norm": 3.010484218597412, "learning_rate": 2.13389712238717e-06, "loss": 0.5863, "step": 10687 }, { "epoch": 0.752411122844069, "grad_norm": 1.9513206481933594, "learning_rate": 2.1327498762636233e-06, "loss": 0.6466, "step": 10688 }, { "epoch": 0.7524815205913411, "grad_norm": 2.1491734981536865, "learning_rate": 2.1316028831928186e-06, "loss": 0.7588, "step": 10689 }, { "epoch": 0.7525519183386131, "grad_norm": 1.9867998361587524, "learning_rate": 2.1304561432343863e-06, "loss": 0.6308, "step": 10690 }, { "epoch": 0.7526223160858853, "grad_norm": 1.7818580865859985, "learning_rate": 2.1293096564479525e-06, "loss": 0.6205, "step": 10691 }, { "epoch": 0.7526927138331574, "grad_norm": 1.902833342552185, "learning_rate": 2.128163422893118e-06, "loss": 0.6652, "step": 10692 }, { "epoch": 0.7527631115804294, "grad_norm": 2.2447350025177, "learning_rate": 2.127017442629481e-06, "loss": 0.588, "step": 10693 }, { "epoch": 0.7528335093277015, "grad_norm": 2.0366477966308594, "learning_rate": 2.1258717157166194e-06, "loss": 0.6488, "step": 10694 }, { "epoch": 0.7529039070749736, "grad_norm": 1.9507791996002197, "learning_rate": 2.1247262422141052e-06, "loss": 0.6585, "step": 10695 }, { "epoch": 0.7529743048222457, "grad_norm": 2.2671337127685547, "learning_rate": 2.123581022181485e-06, "loss": 0.6024, "step": 10696 }, { "epoch": 0.7530447025695177, "grad_norm": 1.9419043064117432, "learning_rate": 2.1224360556783055e-06, "loss": 0.7349, "step": 10697 }, { "epoch": 0.7531151003167899, "grad_norm": 2.00348162651062, "learning_rate": 2.121291342764091e-06, "loss": 0.5178, "step": 10698 }, { "epoch": 0.753185498064062, "grad_norm": 2.0332655906677246, "learning_rate": 2.1201468834983605e-06, "loss": 0.6041, "step": 10699 }, { "epoch": 0.753255895811334, "grad_norm": 1.5949476957321167, "learning_rate": 2.1190026779406104e-06, "loss": 0.6013, "step": 10700 }, { "epoch": 0.7533262935586061, "grad_norm": 2.18363356590271, "learning_rate": 2.1178587261503357e-06, "loss": 0.7228, "step": 10701 }, { "epoch": 0.7533966913058782, "grad_norm": 8.992382049560547, "learning_rate": 2.1167150281870036e-06, "loss": 0.5774, "step": 10702 }, { "epoch": 0.7534670890531503, "grad_norm": 1.749864935874939, "learning_rate": 2.115571584110081e-06, "loss": 0.6571, "step": 10703 }, { "epoch": 0.7535374868004224, "grad_norm": 1.65178644657135, "learning_rate": 2.1144283939790142e-06, "loss": 0.6661, "step": 10704 }, { "epoch": 0.7536078845476945, "grad_norm": 1.9399515390396118, "learning_rate": 2.1132854578532415e-06, "loss": 0.662, "step": 10705 }, { "epoch": 0.7536782822949666, "grad_norm": 2.146806240081787, "learning_rate": 2.1121427757921815e-06, "loss": 0.6152, "step": 10706 }, { "epoch": 0.7537486800422386, "grad_norm": 1.960129976272583, "learning_rate": 2.111000347855249e-06, "loss": 0.7028, "step": 10707 }, { "epoch": 0.7538190777895107, "grad_norm": 2.1999917030334473, "learning_rate": 2.1098581741018333e-06, "loss": 0.7481, "step": 10708 }, { "epoch": 0.7538894755367829, "grad_norm": 1.5683504343032837, "learning_rate": 2.1087162545913206e-06, "loss": 0.5517, "step": 10709 }, { "epoch": 0.7539598732840549, "grad_norm": 3.1013848781585693, "learning_rate": 2.107574589383078e-06, "loss": 0.6121, "step": 10710 }, { "epoch": 0.754030271031327, "grad_norm": 2.263964891433716, "learning_rate": 2.106433178536465e-06, "loss": 0.6787, "step": 10711 }, { "epoch": 0.754100668778599, "grad_norm": 1.6593859195709229, "learning_rate": 2.1052920221108216e-06, "loss": 0.6195, "step": 10712 }, { "epoch": 0.7541710665258712, "grad_norm": 1.785494327545166, "learning_rate": 2.104151120165483e-06, "loss": 0.6188, "step": 10713 }, { "epoch": 0.7542414642731433, "grad_norm": 1.9968197345733643, "learning_rate": 2.1030104727597575e-06, "loss": 0.6705, "step": 10714 }, { "epoch": 0.7543118620204153, "grad_norm": 2.1422975063323975, "learning_rate": 2.1018700799529537e-06, "loss": 0.7425, "step": 10715 }, { "epoch": 0.7543822597676875, "grad_norm": 1.8996614217758179, "learning_rate": 2.100729941804359e-06, "loss": 0.6663, "step": 10716 }, { "epoch": 0.7544526575149595, "grad_norm": 1.6709067821502686, "learning_rate": 2.099590058373254e-06, "loss": 0.5304, "step": 10717 }, { "epoch": 0.7545230552622316, "grad_norm": 1.715179443359375, "learning_rate": 2.0984504297188976e-06, "loss": 0.6443, "step": 10718 }, { "epoch": 0.7545934530095036, "grad_norm": 1.884588360786438, "learning_rate": 2.097311055900546e-06, "loss": 0.7353, "step": 10719 }, { "epoch": 0.7546638507567758, "grad_norm": 1.81199312210083, "learning_rate": 2.0961719369774292e-06, "loss": 0.748, "step": 10720 }, { "epoch": 0.7547342485040479, "grad_norm": 1.8531882762908936, "learning_rate": 2.095033073008777e-06, "loss": 0.6225, "step": 10721 }, { "epoch": 0.7548046462513199, "grad_norm": 2.0590388774871826, "learning_rate": 2.0938944640537953e-06, "loss": 0.7258, "step": 10722 }, { "epoch": 0.7548750439985921, "grad_norm": 1.99904465675354, "learning_rate": 2.0927561101716855e-06, "loss": 0.6311, "step": 10723 }, { "epoch": 0.7549454417458641, "grad_norm": 2.2521321773529053, "learning_rate": 2.0916180114216274e-06, "loss": 0.6354, "step": 10724 }, { "epoch": 0.7550158394931362, "grad_norm": 2.3889448642730713, "learning_rate": 2.090480167862797e-06, "loss": 0.7483, "step": 10725 }, { "epoch": 0.7550862372404084, "grad_norm": 1.8436771631240845, "learning_rate": 2.089342579554348e-06, "loss": 0.5941, "step": 10726 }, { "epoch": 0.7551566349876804, "grad_norm": 2.3821160793304443, "learning_rate": 2.0882052465554266e-06, "loss": 0.591, "step": 10727 }, { "epoch": 0.7552270327349525, "grad_norm": 1.8380457162857056, "learning_rate": 2.08706816892516e-06, "loss": 0.6708, "step": 10728 }, { "epoch": 0.7552974304822245, "grad_norm": 1.6096019744873047, "learning_rate": 2.0859313467226714e-06, "loss": 0.6092, "step": 10729 }, { "epoch": 0.7553678282294967, "grad_norm": 2.3598668575286865, "learning_rate": 2.08479478000706e-06, "loss": 0.7093, "step": 10730 }, { "epoch": 0.7554382259767688, "grad_norm": 1.8610048294067383, "learning_rate": 2.083658468837422e-06, "loss": 0.6717, "step": 10731 }, { "epoch": 0.7555086237240408, "grad_norm": 1.9135003089904785, "learning_rate": 2.082522413272832e-06, "loss": 0.6158, "step": 10732 }, { "epoch": 0.755579021471313, "grad_norm": 2.163968801498413, "learning_rate": 2.081386613372356e-06, "loss": 0.5817, "step": 10733 }, { "epoch": 0.755649419218585, "grad_norm": 1.7785471677780151, "learning_rate": 2.080251069195042e-06, "loss": 0.6109, "step": 10734 }, { "epoch": 0.7557198169658571, "grad_norm": 1.9112842082977295, "learning_rate": 2.079115780799933e-06, "loss": 0.6437, "step": 10735 }, { "epoch": 0.7557902147131291, "grad_norm": 1.7798744440078735, "learning_rate": 2.077980748246048e-06, "loss": 0.5323, "step": 10736 }, { "epoch": 0.7558606124604013, "grad_norm": 1.9663434028625488, "learning_rate": 2.0768459715924053e-06, "loss": 0.7212, "step": 10737 }, { "epoch": 0.7559310102076734, "grad_norm": 1.9542757272720337, "learning_rate": 2.0757114508979978e-06, "loss": 0.5766, "step": 10738 }, { "epoch": 0.7560014079549454, "grad_norm": 1.935405969619751, "learning_rate": 2.07457718622181e-06, "loss": 0.7411, "step": 10739 }, { "epoch": 0.7560718057022175, "grad_norm": 1.9950186014175415, "learning_rate": 2.0734431776228176e-06, "loss": 0.6979, "step": 10740 }, { "epoch": 0.7561422034494896, "grad_norm": 1.958431363105774, "learning_rate": 2.072309425159974e-06, "loss": 0.6714, "step": 10741 }, { "epoch": 0.7562126011967617, "grad_norm": 2.247626304626465, "learning_rate": 2.0711759288922278e-06, "loss": 0.6496, "step": 10742 }, { "epoch": 0.7562829989440338, "grad_norm": 1.5054131746292114, "learning_rate": 2.070042688878509e-06, "loss": 0.6596, "step": 10743 }, { "epoch": 0.7563533966913059, "grad_norm": 1.738282561302185, "learning_rate": 2.068909705177735e-06, "loss": 0.5316, "step": 10744 }, { "epoch": 0.756423794438578, "grad_norm": 2.141700029373169, "learning_rate": 2.0677769778488098e-06, "loss": 0.6669, "step": 10745 }, { "epoch": 0.75649419218585, "grad_norm": 1.6373023986816406, "learning_rate": 2.066644506950629e-06, "loss": 0.5552, "step": 10746 }, { "epoch": 0.7565645899331221, "grad_norm": 2.4922585487365723, "learning_rate": 2.065512292542065e-06, "loss": 0.6121, "step": 10747 }, { "epoch": 0.7566349876803943, "grad_norm": 1.9269415140151978, "learning_rate": 2.064380334681989e-06, "loss": 0.6149, "step": 10748 }, { "epoch": 0.7567053854276663, "grad_norm": 2.179696559906006, "learning_rate": 2.063248633429249e-06, "loss": 0.685, "step": 10749 }, { "epoch": 0.7567757831749384, "grad_norm": 2.8891983032226562, "learning_rate": 2.0621171888426832e-06, "loss": 0.6363, "step": 10750 }, { "epoch": 0.7568461809222105, "grad_norm": 1.67112398147583, "learning_rate": 2.060986000981115e-06, "loss": 0.6777, "step": 10751 }, { "epoch": 0.7569165786694826, "grad_norm": 1.7114392518997192, "learning_rate": 2.05985506990336e-06, "loss": 0.6807, "step": 10752 }, { "epoch": 0.7569869764167546, "grad_norm": 1.9333758354187012, "learning_rate": 2.0587243956682115e-06, "loss": 0.8292, "step": 10753 }, { "epoch": 0.7570573741640267, "grad_norm": 1.8891880512237549, "learning_rate": 2.057593978334459e-06, "loss": 0.6404, "step": 10754 }, { "epoch": 0.7571277719112989, "grad_norm": 1.820505976676941, "learning_rate": 2.0564638179608713e-06, "loss": 0.6438, "step": 10755 }, { "epoch": 0.7571981696585709, "grad_norm": 1.8354361057281494, "learning_rate": 2.055333914606207e-06, "loss": 0.5546, "step": 10756 }, { "epoch": 0.757268567405843, "grad_norm": 2.4429869651794434, "learning_rate": 2.0542042683292087e-06, "loss": 0.6761, "step": 10757 }, { "epoch": 0.757338965153115, "grad_norm": 1.913644552230835, "learning_rate": 2.0530748791886117e-06, "loss": 0.7116, "step": 10758 }, { "epoch": 0.7574093629003872, "grad_norm": 1.7810856103897095, "learning_rate": 2.0519457472431296e-06, "loss": 0.5339, "step": 10759 }, { "epoch": 0.7574797606476593, "grad_norm": 1.9395297765731812, "learning_rate": 2.050816872551472e-06, "loss": 0.5888, "step": 10760 }, { "epoch": 0.7575501583949313, "grad_norm": 1.8000385761260986, "learning_rate": 2.049688255172327e-06, "loss": 0.6642, "step": 10761 }, { "epoch": 0.7576205561422035, "grad_norm": 2.2178032398223877, "learning_rate": 2.048559895164373e-06, "loss": 0.6894, "step": 10762 }, { "epoch": 0.7576909538894755, "grad_norm": 2.024202585220337, "learning_rate": 2.047431792586273e-06, "loss": 0.6284, "step": 10763 }, { "epoch": 0.7577613516367476, "grad_norm": 2.4069342613220215, "learning_rate": 2.04630394749668e-06, "loss": 0.6647, "step": 10764 }, { "epoch": 0.7578317493840198, "grad_norm": 2.0945446491241455, "learning_rate": 2.04517635995423e-06, "loss": 0.5639, "step": 10765 }, { "epoch": 0.7579021471312918, "grad_norm": 1.7555890083312988, "learning_rate": 2.04404903001755e-06, "loss": 0.6143, "step": 10766 }, { "epoch": 0.7579725448785639, "grad_norm": 1.7979004383087158, "learning_rate": 2.0429219577452495e-06, "loss": 0.7021, "step": 10767 }, { "epoch": 0.7580429426258359, "grad_norm": 2.3970093727111816, "learning_rate": 2.041795143195926e-06, "loss": 0.5943, "step": 10768 }, { "epoch": 0.7581133403731081, "grad_norm": 2.48932147026062, "learning_rate": 2.040668586428161e-06, "loss": 0.6879, "step": 10769 }, { "epoch": 0.7581837381203802, "grad_norm": 1.9941478967666626, "learning_rate": 2.03954228750053e-06, "loss": 0.6368, "step": 10770 }, { "epoch": 0.7582541358676522, "grad_norm": 1.8611884117126465, "learning_rate": 2.0384162464715857e-06, "loss": 0.6192, "step": 10771 }, { "epoch": 0.7583245336149244, "grad_norm": 1.9283246994018555, "learning_rate": 2.0372904633998764e-06, "loss": 0.7139, "step": 10772 }, { "epoch": 0.7583949313621964, "grad_norm": 1.8900132179260254, "learning_rate": 2.0361649383439307e-06, "loss": 0.6248, "step": 10773 }, { "epoch": 0.7584653291094685, "grad_norm": 1.9863580465316772, "learning_rate": 2.035039671362265e-06, "loss": 0.6984, "step": 10774 }, { "epoch": 0.7585357268567405, "grad_norm": 1.8568038940429688, "learning_rate": 2.0339146625133813e-06, "loss": 0.6972, "step": 10775 }, { "epoch": 0.7586061246040127, "grad_norm": 1.9941904544830322, "learning_rate": 2.0327899118557737e-06, "loss": 0.6792, "step": 10776 }, { "epoch": 0.7586765223512848, "grad_norm": 1.707916259765625, "learning_rate": 2.031665419447915e-06, "loss": 0.6567, "step": 10777 }, { "epoch": 0.7587469200985568, "grad_norm": 2.229140043258667, "learning_rate": 2.0305411853482734e-06, "loss": 0.64, "step": 10778 }, { "epoch": 0.758817317845829, "grad_norm": 1.7428779602050781, "learning_rate": 2.029417209615296e-06, "loss": 0.634, "step": 10779 }, { "epoch": 0.758887715593101, "grad_norm": 2.1376311779022217, "learning_rate": 2.028293492307419e-06, "loss": 0.7055, "step": 10780 }, { "epoch": 0.7589581133403731, "grad_norm": 2.1472020149230957, "learning_rate": 2.027170033483064e-06, "loss": 0.7346, "step": 10781 }, { "epoch": 0.7590285110876452, "grad_norm": 1.7491997480392456, "learning_rate": 2.0260468332006453e-06, "loss": 0.6617, "step": 10782 }, { "epoch": 0.7590989088349173, "grad_norm": 1.89515221118927, "learning_rate": 2.0249238915185534e-06, "loss": 0.6412, "step": 10783 }, { "epoch": 0.7591693065821894, "grad_norm": 1.5369503498077393, "learning_rate": 2.0238012084951773e-06, "loss": 0.5948, "step": 10784 }, { "epoch": 0.7592397043294614, "grad_norm": 3.8681869506835938, "learning_rate": 2.0226787841888822e-06, "loss": 0.7494, "step": 10785 }, { "epoch": 0.7593101020767336, "grad_norm": 1.9966708421707153, "learning_rate": 2.021556618658023e-06, "loss": 0.725, "step": 10786 }, { "epoch": 0.7593804998240057, "grad_norm": 1.804062843322754, "learning_rate": 2.020434711960946e-06, "loss": 0.552, "step": 10787 }, { "epoch": 0.7594508975712777, "grad_norm": 1.6824818849563599, "learning_rate": 2.0193130641559763e-06, "loss": 0.6128, "step": 10788 }, { "epoch": 0.7595212953185498, "grad_norm": 1.8883532285690308, "learning_rate": 2.0181916753014328e-06, "loss": 0.6567, "step": 10789 }, { "epoch": 0.7595916930658219, "grad_norm": 1.9700058698654175, "learning_rate": 2.017070545455617e-06, "loss": 0.6958, "step": 10790 }, { "epoch": 0.759662090813094, "grad_norm": 2.3070149421691895, "learning_rate": 2.015949674676815e-06, "loss": 0.5917, "step": 10791 }, { "epoch": 0.759732488560366, "grad_norm": 2.2768197059631348, "learning_rate": 2.0148290630233017e-06, "loss": 0.7377, "step": 10792 }, { "epoch": 0.7598028863076381, "grad_norm": 2.1575515270233154, "learning_rate": 2.013708710553343e-06, "loss": 0.7417, "step": 10793 }, { "epoch": 0.7598732840549103, "grad_norm": 1.894219994544983, "learning_rate": 2.0125886173251807e-06, "loss": 0.7097, "step": 10794 }, { "epoch": 0.7599436818021823, "grad_norm": 1.919672966003418, "learning_rate": 2.011468783397056e-06, "loss": 0.5777, "step": 10795 }, { "epoch": 0.7600140795494544, "grad_norm": 2.139437437057495, "learning_rate": 2.0103492088271876e-06, "loss": 0.6536, "step": 10796 }, { "epoch": 0.7600844772967265, "grad_norm": 1.7971724271774292, "learning_rate": 2.0092298936737818e-06, "loss": 0.7449, "step": 10797 }, { "epoch": 0.7601548750439986, "grad_norm": 1.9266345500946045, "learning_rate": 2.0081108379950318e-06, "loss": 0.6472, "step": 10798 }, { "epoch": 0.7602252727912707, "grad_norm": 1.9265652894973755, "learning_rate": 2.006992041849123e-06, "loss": 0.612, "step": 10799 }, { "epoch": 0.7602956705385427, "grad_norm": 2.06421160697937, "learning_rate": 2.0058735052942174e-06, "loss": 0.6047, "step": 10800 }, { "epoch": 0.7603660682858149, "grad_norm": 1.8102912902832031, "learning_rate": 2.004755228388473e-06, "loss": 0.5532, "step": 10801 }, { "epoch": 0.7604364660330869, "grad_norm": 1.7224584817886353, "learning_rate": 2.0036372111900265e-06, "loss": 0.589, "step": 10802 }, { "epoch": 0.760506863780359, "grad_norm": 2.140355348587036, "learning_rate": 2.0025194537570103e-06, "loss": 0.6236, "step": 10803 }, { "epoch": 0.7605772615276312, "grad_norm": 1.8907281160354614, "learning_rate": 2.0014019561475293e-06, "loss": 0.5382, "step": 10804 }, { "epoch": 0.7606476592749032, "grad_norm": 1.9855375289916992, "learning_rate": 2.0002847184196904e-06, "loss": 0.6788, "step": 10805 }, { "epoch": 0.7607180570221753, "grad_norm": 1.9366083145141602, "learning_rate": 1.999167740631574e-06, "loss": 0.531, "step": 10806 }, { "epoch": 0.7607884547694473, "grad_norm": 1.9710350036621094, "learning_rate": 1.998051022841259e-06, "loss": 0.638, "step": 10807 }, { "epoch": 0.7608588525167195, "grad_norm": 2.061950922012329, "learning_rate": 1.9969345651067984e-06, "loss": 0.6998, "step": 10808 }, { "epoch": 0.7609292502639915, "grad_norm": 2.579044818878174, "learning_rate": 1.9958183674862455e-06, "loss": 0.6448, "step": 10809 }, { "epoch": 0.7609996480112636, "grad_norm": 1.9797077178955078, "learning_rate": 1.994702430037623e-06, "loss": 0.7286, "step": 10810 }, { "epoch": 0.7610700457585358, "grad_norm": 1.6842553615570068, "learning_rate": 1.9935867528189573e-06, "loss": 0.5958, "step": 10811 }, { "epoch": 0.7611404435058078, "grad_norm": 2.0496912002563477, "learning_rate": 1.9924713358882473e-06, "loss": 0.6583, "step": 10812 }, { "epoch": 0.7612108412530799, "grad_norm": 2.1744179725646973, "learning_rate": 1.9913561793034903e-06, "loss": 0.6353, "step": 10813 }, { "epoch": 0.7612812390003519, "grad_norm": 2.3477537631988525, "learning_rate": 1.9902412831226592e-06, "loss": 0.6701, "step": 10814 }, { "epoch": 0.7613516367476241, "grad_norm": 1.8622901439666748, "learning_rate": 1.9891266474037255e-06, "loss": 0.6119, "step": 10815 }, { "epoch": 0.7614220344948962, "grad_norm": 1.9983611106872559, "learning_rate": 1.9880122722046308e-06, "loss": 0.7463, "step": 10816 }, { "epoch": 0.7614924322421682, "grad_norm": 1.7648141384124756, "learning_rate": 1.9868981575833194e-06, "loss": 0.5745, "step": 10817 }, { "epoch": 0.7615628299894404, "grad_norm": 2.958034038543701, "learning_rate": 1.9857843035977114e-06, "loss": 0.5753, "step": 10818 }, { "epoch": 0.7616332277367124, "grad_norm": 1.6243034601211548, "learning_rate": 1.9846707103057197e-06, "loss": 0.6263, "step": 10819 }, { "epoch": 0.7617036254839845, "grad_norm": 1.6666784286499023, "learning_rate": 1.9835573777652386e-06, "loss": 0.5989, "step": 10820 }, { "epoch": 0.7617740232312566, "grad_norm": 1.8677774667739868, "learning_rate": 1.9824443060341565e-06, "loss": 0.6542, "step": 10821 }, { "epoch": 0.7618444209785287, "grad_norm": 1.8209587335586548, "learning_rate": 1.981331495170335e-06, "loss": 0.5415, "step": 10822 }, { "epoch": 0.7619148187258008, "grad_norm": 2.085097551345825, "learning_rate": 1.9802189452316365e-06, "loss": 0.6121, "step": 10823 }, { "epoch": 0.7619852164730728, "grad_norm": 1.8581043481826782, "learning_rate": 1.9791066562758988e-06, "loss": 0.6977, "step": 10824 }, { "epoch": 0.762055614220345, "grad_norm": 2.061673164367676, "learning_rate": 1.9779946283609554e-06, "loss": 0.6176, "step": 10825 }, { "epoch": 0.7621260119676171, "grad_norm": 2.625190496444702, "learning_rate": 1.976882861544617e-06, "loss": 0.7646, "step": 10826 }, { "epoch": 0.7621964097148891, "grad_norm": 1.7892934083938599, "learning_rate": 1.975771355884693e-06, "loss": 0.5759, "step": 10827 }, { "epoch": 0.7622668074621612, "grad_norm": 1.8167368173599243, "learning_rate": 1.974660111438961e-06, "loss": 0.657, "step": 10828 }, { "epoch": 0.7623372052094333, "grad_norm": 1.8639053106307983, "learning_rate": 1.973549128265204e-06, "loss": 0.6923, "step": 10829 }, { "epoch": 0.7624076029567054, "grad_norm": 2.0939230918884277, "learning_rate": 1.9724384064211775e-06, "loss": 0.639, "step": 10830 }, { "epoch": 0.7624780007039774, "grad_norm": 1.8771915435791016, "learning_rate": 1.9713279459646343e-06, "loss": 0.6156, "step": 10831 }, { "epoch": 0.7625483984512496, "grad_norm": 2.05932354927063, "learning_rate": 1.9702177469533025e-06, "loss": 0.6389, "step": 10832 }, { "epoch": 0.7626187961985217, "grad_norm": 2.2691259384155273, "learning_rate": 1.9691078094449076e-06, "loss": 0.5835, "step": 10833 }, { "epoch": 0.7626891939457937, "grad_norm": 1.7528700828552246, "learning_rate": 1.967998133497154e-06, "loss": 0.6149, "step": 10834 }, { "epoch": 0.7627595916930658, "grad_norm": 1.714844822883606, "learning_rate": 1.9668887191677346e-06, "loss": 0.6434, "step": 10835 }, { "epoch": 0.7628299894403379, "grad_norm": 1.8516194820404053, "learning_rate": 1.9657795665143274e-06, "loss": 0.6767, "step": 10836 }, { "epoch": 0.76290038718761, "grad_norm": 1.8618766069412231, "learning_rate": 1.9646706755945994e-06, "loss": 0.5779, "step": 10837 }, { "epoch": 0.7629707849348821, "grad_norm": 2.072801351547241, "learning_rate": 1.963562046466205e-06, "loss": 0.6891, "step": 10838 }, { "epoch": 0.7630411826821542, "grad_norm": 2.0085902214050293, "learning_rate": 1.962453679186782e-06, "loss": 0.6035, "step": 10839 }, { "epoch": 0.7631115804294263, "grad_norm": 1.8698680400848389, "learning_rate": 1.9613455738139536e-06, "loss": 0.7211, "step": 10840 }, { "epoch": 0.7631819781766983, "grad_norm": 1.9360501766204834, "learning_rate": 1.96023773040533e-06, "loss": 0.6132, "step": 10841 }, { "epoch": 0.7632523759239704, "grad_norm": 1.9527859687805176, "learning_rate": 1.959130149018513e-06, "loss": 0.6447, "step": 10842 }, { "epoch": 0.7633227736712426, "grad_norm": 2.0749311447143555, "learning_rate": 1.9580228297110836e-06, "loss": 0.6568, "step": 10843 }, { "epoch": 0.7633931714185146, "grad_norm": 2.4530117511749268, "learning_rate": 1.9569157725406157e-06, "loss": 0.7114, "step": 10844 }, { "epoch": 0.7634635691657867, "grad_norm": 1.6684939861297607, "learning_rate": 1.955808977564663e-06, "loss": 0.6477, "step": 10845 }, { "epoch": 0.7635339669130587, "grad_norm": 1.917960286140442, "learning_rate": 1.9547024448407705e-06, "loss": 0.64, "step": 10846 }, { "epoch": 0.7636043646603309, "grad_norm": 1.7582372426986694, "learning_rate": 1.953596174426465e-06, "loss": 0.6514, "step": 10847 }, { "epoch": 0.7636747624076029, "grad_norm": 2.3220207691192627, "learning_rate": 1.952490166379267e-06, "loss": 0.7243, "step": 10848 }, { "epoch": 0.763745160154875, "grad_norm": 1.99081289768219, "learning_rate": 1.9513844207566743e-06, "loss": 0.6853, "step": 10849 }, { "epoch": 0.7638155579021472, "grad_norm": 2.2082183361053467, "learning_rate": 1.9502789376161804e-06, "loss": 0.5839, "step": 10850 }, { "epoch": 0.7638859556494192, "grad_norm": 1.570926547050476, "learning_rate": 1.949173717015257e-06, "loss": 0.6659, "step": 10851 }, { "epoch": 0.7639563533966913, "grad_norm": 1.9713387489318848, "learning_rate": 1.9480687590113667e-06, "loss": 0.5807, "step": 10852 }, { "epoch": 0.7640267511439633, "grad_norm": 1.6309221982955933, "learning_rate": 1.9469640636619545e-06, "loss": 0.6857, "step": 10853 }, { "epoch": 0.7640971488912355, "grad_norm": 1.512961983680725, "learning_rate": 1.9458596310244594e-06, "loss": 0.5362, "step": 10854 }, { "epoch": 0.7641675466385076, "grad_norm": 1.9155040979385376, "learning_rate": 1.944755461156297e-06, "loss": 0.7054, "step": 10855 }, { "epoch": 0.7642379443857796, "grad_norm": 1.8283376693725586, "learning_rate": 1.943651554114879e-06, "loss": 0.5709, "step": 10856 }, { "epoch": 0.7643083421330518, "grad_norm": 1.951034665107727, "learning_rate": 1.942547909957596e-06, "loss": 0.4867, "step": 10857 }, { "epoch": 0.7643787398803238, "grad_norm": 1.506604790687561, "learning_rate": 1.941444528741827e-06, "loss": 0.6714, "step": 10858 }, { "epoch": 0.7644491376275959, "grad_norm": 1.6766712665557861, "learning_rate": 1.9403414105249363e-06, "loss": 0.6631, "step": 10859 }, { "epoch": 0.764519535374868, "grad_norm": 2.0153839588165283, "learning_rate": 1.9392385553642807e-06, "loss": 0.5752, "step": 10860 }, { "epoch": 0.7645899331221401, "grad_norm": 1.9460481405258179, "learning_rate": 1.9381359633171933e-06, "loss": 0.6622, "step": 10861 }, { "epoch": 0.7646603308694122, "grad_norm": 2.0354034900665283, "learning_rate": 1.937033634441003e-06, "loss": 0.6527, "step": 10862 }, { "epoch": 0.7647307286166842, "grad_norm": 2.0402092933654785, "learning_rate": 1.9359315687930203e-06, "loss": 0.6557, "step": 10863 }, { "epoch": 0.7648011263639564, "grad_norm": 1.772151231765747, "learning_rate": 1.9348297664305408e-06, "loss": 0.6895, "step": 10864 }, { "epoch": 0.7648715241112285, "grad_norm": 2.132749080657959, "learning_rate": 1.9337282274108476e-06, "loss": 0.7401, "step": 10865 }, { "epoch": 0.7649419218585005, "grad_norm": 1.8612194061279297, "learning_rate": 1.9326269517912143e-06, "loss": 0.6294, "step": 10866 }, { "epoch": 0.7650123196057727, "grad_norm": 1.9259722232818604, "learning_rate": 1.9315259396288926e-06, "loss": 0.579, "step": 10867 }, { "epoch": 0.7650827173530447, "grad_norm": 2.053269863128662, "learning_rate": 1.9304251909811296e-06, "loss": 0.6214, "step": 10868 }, { "epoch": 0.7651531151003168, "grad_norm": 1.7813489437103271, "learning_rate": 1.9293247059051526e-06, "loss": 0.6879, "step": 10869 }, { "epoch": 0.7652235128475888, "grad_norm": 2.142317295074463, "learning_rate": 1.9282244844581757e-06, "loss": 0.6546, "step": 10870 }, { "epoch": 0.765293910594861, "grad_norm": 1.6138652563095093, "learning_rate": 1.9271245266973995e-06, "loss": 0.5844, "step": 10871 }, { "epoch": 0.7653643083421331, "grad_norm": 1.815242886543274, "learning_rate": 1.9260248326800156e-06, "loss": 0.7901, "step": 10872 }, { "epoch": 0.7654347060894051, "grad_norm": 1.7610828876495361, "learning_rate": 1.9249254024631936e-06, "loss": 0.6416, "step": 10873 }, { "epoch": 0.7655051038366772, "grad_norm": 1.7874455451965332, "learning_rate": 1.9238262361040986e-06, "loss": 0.733, "step": 10874 }, { "epoch": 0.7655755015839493, "grad_norm": 1.9892115592956543, "learning_rate": 1.9227273336598744e-06, "loss": 0.7512, "step": 10875 }, { "epoch": 0.7656458993312214, "grad_norm": 1.6593953371047974, "learning_rate": 1.9216286951876546e-06, "loss": 0.6017, "step": 10876 }, { "epoch": 0.7657162970784935, "grad_norm": 2.198329448699951, "learning_rate": 1.9205303207445563e-06, "loss": 0.6149, "step": 10877 }, { "epoch": 0.7657866948257656, "grad_norm": 1.9956153631210327, "learning_rate": 1.9194322103876886e-06, "loss": 0.6604, "step": 10878 }, { "epoch": 0.7658570925730377, "grad_norm": 2.555032730102539, "learning_rate": 1.9183343641741394e-06, "loss": 0.5831, "step": 10879 }, { "epoch": 0.7659274903203097, "grad_norm": 1.8207942247390747, "learning_rate": 1.9172367821609916e-06, "loss": 0.7169, "step": 10880 }, { "epoch": 0.7659978880675818, "grad_norm": 1.7070279121398926, "learning_rate": 1.916139464405306e-06, "loss": 0.6024, "step": 10881 }, { "epoch": 0.766068285814854, "grad_norm": 2.300766944885254, "learning_rate": 1.9150424109641333e-06, "loss": 0.6684, "step": 10882 }, { "epoch": 0.766138683562126, "grad_norm": 1.876111388206482, "learning_rate": 1.91394562189451e-06, "loss": 0.6562, "step": 10883 }, { "epoch": 0.7662090813093981, "grad_norm": 1.703229546546936, "learning_rate": 1.9128490972534617e-06, "loss": 0.6523, "step": 10884 }, { "epoch": 0.7662794790566702, "grad_norm": 2.0282440185546875, "learning_rate": 1.9117528370979936e-06, "loss": 0.7149, "step": 10885 }, { "epoch": 0.7663498768039423, "grad_norm": 1.8566932678222656, "learning_rate": 1.910656841485106e-06, "loss": 0.6327, "step": 10886 }, { "epoch": 0.7664202745512143, "grad_norm": 2.165825843811035, "learning_rate": 1.9095611104717788e-06, "loss": 0.7182, "step": 10887 }, { "epoch": 0.7664906722984864, "grad_norm": 1.8705730438232422, "learning_rate": 1.908465644114977e-06, "loss": 0.5856, "step": 10888 }, { "epoch": 0.7665610700457586, "grad_norm": 2.1930713653564453, "learning_rate": 1.90737044247166e-06, "loss": 0.7689, "step": 10889 }, { "epoch": 0.7666314677930306, "grad_norm": 1.9775259494781494, "learning_rate": 1.9062755055987632e-06, "loss": 0.7034, "step": 10890 }, { "epoch": 0.7667018655403027, "grad_norm": 1.88764488697052, "learning_rate": 1.905180833553219e-06, "loss": 0.6299, "step": 10891 }, { "epoch": 0.7667722632875748, "grad_norm": 1.8017656803131104, "learning_rate": 1.904086426391937e-06, "loss": 0.6329, "step": 10892 }, { "epoch": 0.7668426610348469, "grad_norm": 1.963010311126709, "learning_rate": 1.9029922841718166e-06, "loss": 0.7608, "step": 10893 }, { "epoch": 0.766913058782119, "grad_norm": 1.722939133644104, "learning_rate": 1.9018984069497415e-06, "loss": 0.585, "step": 10894 }, { "epoch": 0.766983456529391, "grad_norm": 1.884153127670288, "learning_rate": 1.9008047947825874e-06, "loss": 0.5385, "step": 10895 }, { "epoch": 0.7670538542766632, "grad_norm": 2.174384117126465, "learning_rate": 1.899711447727208e-06, "loss": 0.7878, "step": 10896 }, { "epoch": 0.7671242520239352, "grad_norm": 1.7687455415725708, "learning_rate": 1.8986183658404519e-06, "loss": 0.6768, "step": 10897 }, { "epoch": 0.7671946497712073, "grad_norm": 1.6976051330566406, "learning_rate": 1.8975255491791465e-06, "loss": 0.7771, "step": 10898 }, { "epoch": 0.7672650475184795, "grad_norm": 2.164806365966797, "learning_rate": 1.896432997800109e-06, "loss": 0.6698, "step": 10899 }, { "epoch": 0.7673354452657515, "grad_norm": 1.717596411705017, "learning_rate": 1.8953407117601401e-06, "loss": 0.6494, "step": 10900 }, { "epoch": 0.7674058430130236, "grad_norm": 2.1846981048583984, "learning_rate": 1.8942486911160328e-06, "loss": 0.6715, "step": 10901 }, { "epoch": 0.7674762407602956, "grad_norm": 1.9375718832015991, "learning_rate": 1.8931569359245584e-06, "loss": 0.6595, "step": 10902 }, { "epoch": 0.7675466385075678, "grad_norm": 2.1230781078338623, "learning_rate": 1.892065446242481e-06, "loss": 0.6517, "step": 10903 }, { "epoch": 0.7676170362548398, "grad_norm": 1.8957995176315308, "learning_rate": 1.890974222126548e-06, "loss": 0.655, "step": 10904 }, { "epoch": 0.7676874340021119, "grad_norm": 1.9107563495635986, "learning_rate": 1.8898832636334918e-06, "loss": 0.6284, "step": 10905 }, { "epoch": 0.7677578317493841, "grad_norm": 1.715765118598938, "learning_rate": 1.8887925708200306e-06, "loss": 0.5767, "step": 10906 }, { "epoch": 0.7678282294966561, "grad_norm": 2.4516358375549316, "learning_rate": 1.8877021437428752e-06, "loss": 0.6143, "step": 10907 }, { "epoch": 0.7678986272439282, "grad_norm": 2.193472146987915, "learning_rate": 1.8866119824587127e-06, "loss": 0.6454, "step": 10908 }, { "epoch": 0.7679690249912002, "grad_norm": 1.895568609237671, "learning_rate": 1.885522087024226e-06, "loss": 0.6085, "step": 10909 }, { "epoch": 0.7680394227384724, "grad_norm": 1.7502933740615845, "learning_rate": 1.8844324574960767e-06, "loss": 0.6933, "step": 10910 }, { "epoch": 0.7681098204857445, "grad_norm": 1.8721530437469482, "learning_rate": 1.8833430939309203e-06, "loss": 0.6475, "step": 10911 }, { "epoch": 0.7681802182330165, "grad_norm": 1.7968568801879883, "learning_rate": 1.8822539963853866e-06, "loss": 0.6553, "step": 10912 }, { "epoch": 0.7682506159802887, "grad_norm": 2.4510445594787598, "learning_rate": 1.8811651649161046e-06, "loss": 0.7212, "step": 10913 }, { "epoch": 0.7683210137275607, "grad_norm": 1.7534704208374023, "learning_rate": 1.880076599579679e-06, "loss": 0.5445, "step": 10914 }, { "epoch": 0.7683914114748328, "grad_norm": 1.8581663370132446, "learning_rate": 1.8789883004327106e-06, "loss": 0.6029, "step": 10915 }, { "epoch": 0.7684618092221049, "grad_norm": 2.13411808013916, "learning_rate": 1.877900267531776e-06, "loss": 0.7641, "step": 10916 }, { "epoch": 0.768532206969377, "grad_norm": 2.026169776916504, "learning_rate": 1.8768125009334492e-06, "loss": 0.6657, "step": 10917 }, { "epoch": 0.7686026047166491, "grad_norm": 2.0005686283111572, "learning_rate": 1.8757250006942766e-06, "loss": 0.5663, "step": 10918 }, { "epoch": 0.7686730024639211, "grad_norm": 2.4174747467041016, "learning_rate": 1.8746377668708037e-06, "loss": 0.7315, "step": 10919 }, { "epoch": 0.7687434002111933, "grad_norm": 1.8281002044677734, "learning_rate": 1.8735507995195536e-06, "loss": 0.6467, "step": 10920 }, { "epoch": 0.7688137979584654, "grad_norm": 1.9262804985046387, "learning_rate": 1.8724640986970417e-06, "loss": 0.6382, "step": 10921 }, { "epoch": 0.7688841957057374, "grad_norm": 1.853500247001648, "learning_rate": 1.871377664459763e-06, "loss": 0.6924, "step": 10922 }, { "epoch": 0.7689545934530095, "grad_norm": 1.6917481422424316, "learning_rate": 1.8702914968642088e-06, "loss": 0.7336, "step": 10923 }, { "epoch": 0.7690249912002816, "grad_norm": 1.9509743452072144, "learning_rate": 1.869205595966841e-06, "loss": 0.6231, "step": 10924 }, { "epoch": 0.7690953889475537, "grad_norm": 1.7021114826202393, "learning_rate": 1.868119961824123e-06, "loss": 0.5654, "step": 10925 }, { "epoch": 0.7691657866948257, "grad_norm": 2.0035266876220703, "learning_rate": 1.8670345944924946e-06, "loss": 0.6344, "step": 10926 }, { "epoch": 0.7692361844420978, "grad_norm": 1.7008312940597534, "learning_rate": 1.8659494940283876e-06, "loss": 0.741, "step": 10927 }, { "epoch": 0.76930658218937, "grad_norm": 1.8191039562225342, "learning_rate": 1.864864660488214e-06, "loss": 0.6582, "step": 10928 }, { "epoch": 0.769376979936642, "grad_norm": 1.8303632736206055, "learning_rate": 1.863780093928382e-06, "loss": 0.5956, "step": 10929 }, { "epoch": 0.7694473776839141, "grad_norm": 1.8405649662017822, "learning_rate": 1.86269579440527e-06, "loss": 0.7059, "step": 10930 }, { "epoch": 0.7695177754311862, "grad_norm": 1.9309446811676025, "learning_rate": 1.8616117619752588e-06, "loss": 0.6094, "step": 10931 }, { "epoch": 0.7695881731784583, "grad_norm": 1.6489958763122559, "learning_rate": 1.860527996694703e-06, "loss": 0.618, "step": 10932 }, { "epoch": 0.7696585709257304, "grad_norm": 2.1765079498291016, "learning_rate": 1.8594444986199534e-06, "loss": 0.6673, "step": 10933 }, { "epoch": 0.7697289686730024, "grad_norm": 2.3700578212738037, "learning_rate": 1.858361267807338e-06, "loss": 0.6396, "step": 10934 }, { "epoch": 0.7697993664202746, "grad_norm": 1.6630127429962158, "learning_rate": 1.8572783043131816e-06, "loss": 0.4975, "step": 10935 }, { "epoch": 0.7698697641675466, "grad_norm": 2.0298945903778076, "learning_rate": 1.8561956081937788e-06, "loss": 0.6333, "step": 10936 }, { "epoch": 0.7699401619148187, "grad_norm": 1.7896430492401123, "learning_rate": 1.8551131795054244e-06, "loss": 0.5332, "step": 10937 }, { "epoch": 0.7700105596620909, "grad_norm": 1.6912990808486938, "learning_rate": 1.854031018304398e-06, "loss": 0.6503, "step": 10938 }, { "epoch": 0.7700809574093629, "grad_norm": 1.779786467552185, "learning_rate": 1.852949124646957e-06, "loss": 0.6829, "step": 10939 }, { "epoch": 0.770151355156635, "grad_norm": 1.8129569292068481, "learning_rate": 1.851867498589355e-06, "loss": 0.7119, "step": 10940 }, { "epoch": 0.770221752903907, "grad_norm": 2.1733458042144775, "learning_rate": 1.8507861401878239e-06, "loss": 0.677, "step": 10941 }, { "epoch": 0.7702921506511792, "grad_norm": 2.070523977279663, "learning_rate": 1.8497050494985848e-06, "loss": 0.6494, "step": 10942 }, { "epoch": 0.7703625483984512, "grad_norm": 1.9014041423797607, "learning_rate": 1.8486242265778427e-06, "loss": 0.6024, "step": 10943 }, { "epoch": 0.7704329461457233, "grad_norm": 1.9929664134979248, "learning_rate": 1.847543671481794e-06, "loss": 0.7296, "step": 10944 }, { "epoch": 0.7705033438929955, "grad_norm": 1.8248388767242432, "learning_rate": 1.8464633842666148e-06, "loss": 0.6451, "step": 10945 }, { "epoch": 0.7705737416402675, "grad_norm": 1.9449684619903564, "learning_rate": 1.8453833649884728e-06, "loss": 0.6758, "step": 10946 }, { "epoch": 0.7706441393875396, "grad_norm": 2.6143882274627686, "learning_rate": 1.8443036137035188e-06, "loss": 0.6623, "step": 10947 }, { "epoch": 0.7707145371348116, "grad_norm": 2.3402364253997803, "learning_rate": 1.8432241304678886e-06, "loss": 0.688, "step": 10948 }, { "epoch": 0.7707849348820838, "grad_norm": 1.9217042922973633, "learning_rate": 1.842144915337704e-06, "loss": 0.7133, "step": 10949 }, { "epoch": 0.7708553326293559, "grad_norm": 1.85403311252594, "learning_rate": 1.8410659683690788e-06, "loss": 0.622, "step": 10950 }, { "epoch": 0.7709257303766279, "grad_norm": 2.3979179859161377, "learning_rate": 1.8399872896181042e-06, "loss": 0.6194, "step": 10951 }, { "epoch": 0.7709961281239001, "grad_norm": 1.7140896320343018, "learning_rate": 1.838908879140865e-06, "loss": 0.7445, "step": 10952 }, { "epoch": 0.7710665258711721, "grad_norm": 2.0895164012908936, "learning_rate": 1.8378307369934283e-06, "loss": 0.6349, "step": 10953 }, { "epoch": 0.7711369236184442, "grad_norm": 2.5773916244506836, "learning_rate": 1.8367528632318462e-06, "loss": 0.6175, "step": 10954 }, { "epoch": 0.7712073213657163, "grad_norm": 1.74588143825531, "learning_rate": 1.8356752579121564e-06, "loss": 0.5878, "step": 10955 }, { "epoch": 0.7712777191129884, "grad_norm": 1.9921084642410278, "learning_rate": 1.8345979210903898e-06, "loss": 0.6253, "step": 10956 }, { "epoch": 0.7713481168602605, "grad_norm": 1.9225609302520752, "learning_rate": 1.8335208528225534e-06, "loss": 0.6626, "step": 10957 }, { "epoch": 0.7714185146075325, "grad_norm": 3.5117552280426025, "learning_rate": 1.8324440531646484e-06, "loss": 0.6677, "step": 10958 }, { "epoch": 0.7714889123548047, "grad_norm": 1.7947590351104736, "learning_rate": 1.8313675221726574e-06, "loss": 0.6492, "step": 10959 }, { "epoch": 0.7715593101020767, "grad_norm": 1.706742525100708, "learning_rate": 1.8302912599025492e-06, "loss": 0.6197, "step": 10960 }, { "epoch": 0.7716297078493488, "grad_norm": 1.918839931488037, "learning_rate": 1.8292152664102787e-06, "loss": 0.6348, "step": 10961 }, { "epoch": 0.7717001055966209, "grad_norm": 1.9730713367462158, "learning_rate": 1.8281395417517916e-06, "loss": 0.6292, "step": 10962 }, { "epoch": 0.771770503343893, "grad_norm": 1.7873406410217285, "learning_rate": 1.8270640859830123e-06, "loss": 0.6391, "step": 10963 }, { "epoch": 0.7718409010911651, "grad_norm": 1.8078564405441284, "learning_rate": 1.8259888991598568e-06, "loss": 0.5736, "step": 10964 }, { "epoch": 0.7719112988384371, "grad_norm": 2.1115171909332275, "learning_rate": 1.8249139813382254e-06, "loss": 0.6998, "step": 10965 }, { "epoch": 0.7719816965857093, "grad_norm": 2.4990203380584717, "learning_rate": 1.8238393325740026e-06, "loss": 0.6319, "step": 10966 }, { "epoch": 0.7720520943329814, "grad_norm": 1.9439697265625, "learning_rate": 1.8227649529230582e-06, "loss": 0.5961, "step": 10967 }, { "epoch": 0.7721224920802534, "grad_norm": 2.3151116371154785, "learning_rate": 1.8216908424412556e-06, "loss": 0.6392, "step": 10968 }, { "epoch": 0.7721928898275255, "grad_norm": 1.686690330505371, "learning_rate": 1.8206170011844327e-06, "loss": 0.5782, "step": 10969 }, { "epoch": 0.7722632875747976, "grad_norm": 1.5983541011810303, "learning_rate": 1.8195434292084252e-06, "loss": 0.6726, "step": 10970 }, { "epoch": 0.7723336853220697, "grad_norm": 2.0497686862945557, "learning_rate": 1.8184701265690468e-06, "loss": 0.7441, "step": 10971 }, { "epoch": 0.7724040830693418, "grad_norm": 1.9763208627700806, "learning_rate": 1.8173970933220988e-06, "loss": 0.5735, "step": 10972 }, { "epoch": 0.7724744808166139, "grad_norm": 1.6335068941116333, "learning_rate": 1.8163243295233678e-06, "loss": 0.711, "step": 10973 }, { "epoch": 0.772544878563886, "grad_norm": 2.3328099250793457, "learning_rate": 1.8152518352286312e-06, "loss": 0.6749, "step": 10974 }, { "epoch": 0.772615276311158, "grad_norm": 1.8194034099578857, "learning_rate": 1.8141796104936454e-06, "loss": 0.5977, "step": 10975 }, { "epoch": 0.7726856740584301, "grad_norm": 1.787309169769287, "learning_rate": 1.813107655374161e-06, "loss": 0.623, "step": 10976 }, { "epoch": 0.7727560718057023, "grad_norm": 1.773770809173584, "learning_rate": 1.8120359699259062e-06, "loss": 0.6557, "step": 10977 }, { "epoch": 0.7728264695529743, "grad_norm": 1.7153410911560059, "learning_rate": 1.8109645542045996e-06, "loss": 0.679, "step": 10978 }, { "epoch": 0.7728968673002464, "grad_norm": 1.954724907875061, "learning_rate": 1.8098934082659445e-06, "loss": 0.6976, "step": 10979 }, { "epoch": 0.7729672650475184, "grad_norm": 1.7913336753845215, "learning_rate": 1.8088225321656324e-06, "loss": 0.6446, "step": 10980 }, { "epoch": 0.7730376627947906, "grad_norm": 1.9435333013534546, "learning_rate": 1.8077519259593369e-06, "loss": 0.6504, "step": 10981 }, { "epoch": 0.7731080605420626, "grad_norm": 1.9646354913711548, "learning_rate": 1.806681589702723e-06, "loss": 0.5554, "step": 10982 }, { "epoch": 0.7731784582893347, "grad_norm": 1.7055813074111938, "learning_rate": 1.8056115234514358e-06, "loss": 0.6614, "step": 10983 }, { "epoch": 0.7732488560366069, "grad_norm": 2.268397331237793, "learning_rate": 1.8045417272611107e-06, "loss": 0.6145, "step": 10984 }, { "epoch": 0.7733192537838789, "grad_norm": 2.0758512020111084, "learning_rate": 1.8034722011873641e-06, "loss": 0.6764, "step": 10985 }, { "epoch": 0.773389651531151, "grad_norm": 1.9915462732315063, "learning_rate": 1.8024029452858066e-06, "loss": 0.7042, "step": 10986 }, { "epoch": 0.773460049278423, "grad_norm": 1.839612364768982, "learning_rate": 1.8013339596120244e-06, "loss": 0.5654, "step": 10987 }, { "epoch": 0.7735304470256952, "grad_norm": 3.047257661819458, "learning_rate": 1.8002652442216006e-06, "loss": 0.6339, "step": 10988 }, { "epoch": 0.7736008447729673, "grad_norm": 2.1048054695129395, "learning_rate": 1.7991967991700959e-06, "loss": 0.6484, "step": 10989 }, { "epoch": 0.7736712425202393, "grad_norm": 1.809146523475647, "learning_rate": 1.7981286245130568e-06, "loss": 0.5585, "step": 10990 }, { "epoch": 0.7737416402675115, "grad_norm": 1.606680154800415, "learning_rate": 1.7970607203060243e-06, "loss": 0.564, "step": 10991 }, { "epoch": 0.7738120380147835, "grad_norm": 2.022623300552368, "learning_rate": 1.7959930866045149e-06, "loss": 0.5577, "step": 10992 }, { "epoch": 0.7738824357620556, "grad_norm": 1.6203343868255615, "learning_rate": 1.79492572346404e-06, "loss": 0.6333, "step": 10993 }, { "epoch": 0.7739528335093278, "grad_norm": 2.0182344913482666, "learning_rate": 1.7938586309400905e-06, "loss": 0.6851, "step": 10994 }, { "epoch": 0.7740232312565998, "grad_norm": 1.7931199073791504, "learning_rate": 1.792791809088146e-06, "loss": 0.5804, "step": 10995 }, { "epoch": 0.7740936290038719, "grad_norm": 2.1684651374816895, "learning_rate": 1.7917252579636691e-06, "loss": 0.7803, "step": 10996 }, { "epoch": 0.7741640267511439, "grad_norm": 1.7898391485214233, "learning_rate": 1.7906589776221154e-06, "loss": 0.678, "step": 10997 }, { "epoch": 0.7742344244984161, "grad_norm": 2.2383217811584473, "learning_rate": 1.7895929681189167e-06, "loss": 0.5998, "step": 10998 }, { "epoch": 0.7743048222456881, "grad_norm": 1.791656494140625, "learning_rate": 1.7885272295095006e-06, "loss": 0.4995, "step": 10999 }, { "epoch": 0.7743752199929602, "grad_norm": 1.7421952486038208, "learning_rate": 1.787461761849274e-06, "loss": 0.6728, "step": 11000 }, { "epoch": 0.7744456177402324, "grad_norm": 2.089238405227661, "learning_rate": 1.7863965651936303e-06, "loss": 0.6741, "step": 11001 }, { "epoch": 0.7745160154875044, "grad_norm": 1.776188850402832, "learning_rate": 1.7853316395979495e-06, "loss": 0.7807, "step": 11002 }, { "epoch": 0.7745864132347765, "grad_norm": 1.5738986730575562, "learning_rate": 1.7842669851176014e-06, "loss": 0.6167, "step": 11003 }, { "epoch": 0.7746568109820485, "grad_norm": 1.8955340385437012, "learning_rate": 1.7832026018079343e-06, "loss": 0.6394, "step": 11004 }, { "epoch": 0.7747272087293207, "grad_norm": 2.0665059089660645, "learning_rate": 1.7821384897242903e-06, "loss": 0.5508, "step": 11005 }, { "epoch": 0.7747976064765928, "grad_norm": 2.4398193359375, "learning_rate": 1.7810746489219927e-06, "loss": 0.749, "step": 11006 }, { "epoch": 0.7748680042238648, "grad_norm": 1.9993984699249268, "learning_rate": 1.7800110794563498e-06, "loss": 0.5938, "step": 11007 }, { "epoch": 0.774938401971137, "grad_norm": 1.561629056930542, "learning_rate": 1.7789477813826565e-06, "loss": 0.622, "step": 11008 }, { "epoch": 0.775008799718409, "grad_norm": 2.344005823135376, "learning_rate": 1.777884754756199e-06, "loss": 0.7067, "step": 11009 }, { "epoch": 0.7750791974656811, "grad_norm": 2.2331273555755615, "learning_rate": 1.7768219996322395e-06, "loss": 0.7552, "step": 11010 }, { "epoch": 0.7751495952129532, "grad_norm": 1.8949662446975708, "learning_rate": 1.7757595160660376e-06, "loss": 0.5932, "step": 11011 }, { "epoch": 0.7752199929602253, "grad_norm": 1.903679370880127, "learning_rate": 1.7746973041128298e-06, "loss": 0.6233, "step": 11012 }, { "epoch": 0.7752903907074974, "grad_norm": 1.7019728422164917, "learning_rate": 1.7736353638278406e-06, "loss": 0.5626, "step": 11013 }, { "epoch": 0.7753607884547694, "grad_norm": 1.798667073249817, "learning_rate": 1.7725736952662809e-06, "loss": 0.6641, "step": 11014 }, { "epoch": 0.7754311862020415, "grad_norm": 2.9549643993377686, "learning_rate": 1.7715122984833511e-06, "loss": 0.734, "step": 11015 }, { "epoch": 0.7755015839493136, "grad_norm": 1.5921626091003418, "learning_rate": 1.7704511735342294e-06, "loss": 0.7424, "step": 11016 }, { "epoch": 0.7755719816965857, "grad_norm": 2.0752453804016113, "learning_rate": 1.7693903204740894e-06, "loss": 0.5622, "step": 11017 }, { "epoch": 0.7756423794438578, "grad_norm": 1.5865192413330078, "learning_rate": 1.7683297393580826e-06, "loss": 0.6085, "step": 11018 }, { "epoch": 0.7757127771911299, "grad_norm": 1.8185125589370728, "learning_rate": 1.7672694302413536e-06, "loss": 0.6853, "step": 11019 }, { "epoch": 0.775783174938402, "grad_norm": 1.6539126634597778, "learning_rate": 1.7662093931790232e-06, "loss": 0.5613, "step": 11020 }, { "epoch": 0.775853572685674, "grad_norm": 1.8688111305236816, "learning_rate": 1.7651496282262079e-06, "loss": 0.5588, "step": 11021 }, { "epoch": 0.7759239704329461, "grad_norm": 2.8568503856658936, "learning_rate": 1.7640901354380019e-06, "loss": 0.5551, "step": 11022 }, { "epoch": 0.7759943681802183, "grad_norm": 1.5932812690734863, "learning_rate": 1.7630309148694946e-06, "loss": 0.5841, "step": 11023 }, { "epoch": 0.7760647659274903, "grad_norm": 2.0643835067749023, "learning_rate": 1.7619719665757499e-06, "loss": 0.7426, "step": 11024 }, { "epoch": 0.7761351636747624, "grad_norm": 2.6580164432525635, "learning_rate": 1.760913290611831e-06, "loss": 0.6856, "step": 11025 }, { "epoch": 0.7762055614220345, "grad_norm": 2.4521689414978027, "learning_rate": 1.7598548870327713e-06, "loss": 0.6783, "step": 11026 }, { "epoch": 0.7762759591693066, "grad_norm": 1.7272828817367554, "learning_rate": 1.7587967558936034e-06, "loss": 0.6545, "step": 11027 }, { "epoch": 0.7763463569165787, "grad_norm": 1.7343230247497559, "learning_rate": 1.7577388972493363e-06, "loss": 0.6395, "step": 11028 }, { "epoch": 0.7764167546638507, "grad_norm": 1.8594332933425903, "learning_rate": 1.7566813111549748e-06, "loss": 0.7705, "step": 11029 }, { "epoch": 0.7764871524111229, "grad_norm": 2.1575520038604736, "learning_rate": 1.7556239976654976e-06, "loss": 0.6065, "step": 11030 }, { "epoch": 0.7765575501583949, "grad_norm": 1.8409764766693115, "learning_rate": 1.7545669568358825e-06, "loss": 0.6343, "step": 11031 }, { "epoch": 0.776627947905667, "grad_norm": 2.4617669582366943, "learning_rate": 1.7535101887210784e-06, "loss": 0.6205, "step": 11032 }, { "epoch": 0.7766983456529392, "grad_norm": 1.5973542928695679, "learning_rate": 1.7524536933760327e-06, "loss": 0.6165, "step": 11033 }, { "epoch": 0.7767687434002112, "grad_norm": 1.836305856704712, "learning_rate": 1.75139747085567e-06, "loss": 0.6205, "step": 11034 }, { "epoch": 0.7768391411474833, "grad_norm": 1.6932168006896973, "learning_rate": 1.7503415212149077e-06, "loss": 0.6653, "step": 11035 }, { "epoch": 0.7769095388947553, "grad_norm": 1.6556180715560913, "learning_rate": 1.7492858445086429e-06, "loss": 0.6805, "step": 11036 }, { "epoch": 0.7769799366420275, "grad_norm": 2.1157031059265137, "learning_rate": 1.7482304407917663e-06, "loss": 0.6376, "step": 11037 }, { "epoch": 0.7770503343892995, "grad_norm": 2.147099733352661, "learning_rate": 1.7471753101191409e-06, "loss": 0.648, "step": 11038 }, { "epoch": 0.7771207321365716, "grad_norm": 1.5262255668640137, "learning_rate": 1.7461204525456285e-06, "loss": 0.7772, "step": 11039 }, { "epoch": 0.7771911298838438, "grad_norm": 1.8037056922912598, "learning_rate": 1.745065868126074e-06, "loss": 0.4989, "step": 11040 }, { "epoch": 0.7772615276311158, "grad_norm": 1.5460970401763916, "learning_rate": 1.7440115569153025e-06, "loss": 0.5623, "step": 11041 }, { "epoch": 0.7773319253783879, "grad_norm": 1.7980389595031738, "learning_rate": 1.7429575189681315e-06, "loss": 0.6013, "step": 11042 }, { "epoch": 0.7774023231256599, "grad_norm": 1.618548035621643, "learning_rate": 1.7419037543393606e-06, "loss": 0.6837, "step": 11043 }, { "epoch": 0.7774727208729321, "grad_norm": 1.6543536186218262, "learning_rate": 1.7408502630837756e-06, "loss": 0.6242, "step": 11044 }, { "epoch": 0.7775431186202042, "grad_norm": 2.241278648376465, "learning_rate": 1.7397970452561464e-06, "loss": 0.681, "step": 11045 }, { "epoch": 0.7776135163674762, "grad_norm": 1.9618085622787476, "learning_rate": 1.7387441009112345e-06, "loss": 0.765, "step": 11046 }, { "epoch": 0.7776839141147484, "grad_norm": 1.665527582168579, "learning_rate": 1.7376914301037797e-06, "loss": 0.5803, "step": 11047 }, { "epoch": 0.7777543118620204, "grad_norm": 1.928641438484192, "learning_rate": 1.7366390328885157e-06, "loss": 0.7723, "step": 11048 }, { "epoch": 0.7778247096092925, "grad_norm": 1.7774659395217896, "learning_rate": 1.735586909320155e-06, "loss": 0.6022, "step": 11049 }, { "epoch": 0.7778951073565646, "grad_norm": 1.8710529804229736, "learning_rate": 1.7345350594533982e-06, "loss": 0.6166, "step": 11050 }, { "epoch": 0.7779655051038367, "grad_norm": 2.0416781902313232, "learning_rate": 1.7334834833429312e-06, "loss": 0.5424, "step": 11051 }, { "epoch": 0.7780359028511088, "grad_norm": 2.0039896965026855, "learning_rate": 1.7324321810434287e-06, "loss": 0.6707, "step": 11052 }, { "epoch": 0.7781063005983808, "grad_norm": 1.9812105894088745, "learning_rate": 1.7313811526095455e-06, "loss": 0.7653, "step": 11053 }, { "epoch": 0.778176698345653, "grad_norm": 2.2778193950653076, "learning_rate": 1.7303303980959308e-06, "loss": 0.6465, "step": 11054 }, { "epoch": 0.778247096092925, "grad_norm": 1.5538355112075806, "learning_rate": 1.7292799175572105e-06, "loss": 0.5868, "step": 11055 }, { "epoch": 0.7783174938401971, "grad_norm": 1.6901787519454956, "learning_rate": 1.7282297110480006e-06, "loss": 0.7755, "step": 11056 }, { "epoch": 0.7783878915874692, "grad_norm": 2.0541203022003174, "learning_rate": 1.727179778622901e-06, "loss": 0.6781, "step": 11057 }, { "epoch": 0.7784582893347413, "grad_norm": 2.0761497020721436, "learning_rate": 1.7261301203365013e-06, "loss": 0.6503, "step": 11058 }, { "epoch": 0.7785286870820134, "grad_norm": 1.754136085510254, "learning_rate": 1.7250807362433714e-06, "loss": 0.7743, "step": 11059 }, { "epoch": 0.7785990848292854, "grad_norm": 1.7441813945770264, "learning_rate": 1.724031626398073e-06, "loss": 0.5532, "step": 11060 }, { "epoch": 0.7786694825765575, "grad_norm": 2.2677829265594482, "learning_rate": 1.7229827908551491e-06, "loss": 0.7432, "step": 11061 }, { "epoch": 0.7787398803238297, "grad_norm": 2.3153162002563477, "learning_rate": 1.7219342296691289e-06, "loss": 0.6672, "step": 11062 }, { "epoch": 0.7788102780711017, "grad_norm": 2.1410880088806152, "learning_rate": 1.7208859428945254e-06, "loss": 0.7027, "step": 11063 }, { "epoch": 0.7788806758183738, "grad_norm": 2.072766065597534, "learning_rate": 1.7198379305858457e-06, "loss": 0.6491, "step": 11064 }, { "epoch": 0.7789510735656459, "grad_norm": 1.9863256216049194, "learning_rate": 1.718790192797572e-06, "loss": 0.6977, "step": 11065 }, { "epoch": 0.779021471312918, "grad_norm": 2.2018282413482666, "learning_rate": 1.7177427295841802e-06, "loss": 0.6823, "step": 11066 }, { "epoch": 0.7790918690601901, "grad_norm": 1.9191722869873047, "learning_rate": 1.7166955410001288e-06, "loss": 0.6698, "step": 11067 }, { "epoch": 0.7791622668074621, "grad_norm": 2.0711610317230225, "learning_rate": 1.7156486270998601e-06, "loss": 0.6887, "step": 11068 }, { "epoch": 0.7792326645547343, "grad_norm": 1.8579130172729492, "learning_rate": 1.7146019879378037e-06, "loss": 0.5965, "step": 11069 }, { "epoch": 0.7793030623020063, "grad_norm": 2.711268186569214, "learning_rate": 1.7135556235683784e-06, "loss": 0.6488, "step": 11070 }, { "epoch": 0.7793734600492784, "grad_norm": 2.123586893081665, "learning_rate": 1.7125095340459822e-06, "loss": 0.6378, "step": 11071 }, { "epoch": 0.7794438577965506, "grad_norm": 2.3076212406158447, "learning_rate": 1.7114637194250062e-06, "loss": 0.6702, "step": 11072 }, { "epoch": 0.7795142555438226, "grad_norm": 2.1527416706085205, "learning_rate": 1.7104181797598207e-06, "loss": 0.6417, "step": 11073 }, { "epoch": 0.7795846532910947, "grad_norm": 1.844515323638916, "learning_rate": 1.7093729151047845e-06, "loss": 0.7044, "step": 11074 }, { "epoch": 0.7796550510383667, "grad_norm": 2.63543438911438, "learning_rate": 1.7083279255142396e-06, "loss": 0.7058, "step": 11075 }, { "epoch": 0.7797254487856389, "grad_norm": 2.2445428371429443, "learning_rate": 1.7072832110425206e-06, "loss": 0.686, "step": 11076 }, { "epoch": 0.7797958465329109, "grad_norm": 1.852050542831421, "learning_rate": 1.7062387717439393e-06, "loss": 0.5033, "step": 11077 }, { "epoch": 0.779866244280183, "grad_norm": 2.14237904548645, "learning_rate": 1.7051946076728002e-06, "loss": 0.5864, "step": 11078 }, { "epoch": 0.7799366420274552, "grad_norm": 1.9742411375045776, "learning_rate": 1.7041507188833883e-06, "loss": 0.5752, "step": 11079 }, { "epoch": 0.7800070397747272, "grad_norm": 1.788723349571228, "learning_rate": 1.703107105429977e-06, "loss": 0.7361, "step": 11080 }, { "epoch": 0.7800774375219993, "grad_norm": 1.880996823310852, "learning_rate": 1.7020637673668226e-06, "loss": 0.659, "step": 11081 }, { "epoch": 0.7801478352692713, "grad_norm": 1.9866633415222168, "learning_rate": 1.7010207047481728e-06, "loss": 0.7008, "step": 11082 }, { "epoch": 0.7802182330165435, "grad_norm": 1.8979167938232422, "learning_rate": 1.6999779176282542e-06, "loss": 0.5993, "step": 11083 }, { "epoch": 0.7802886307638156, "grad_norm": 2.4394445419311523, "learning_rate": 1.6989354060612848e-06, "loss": 0.6431, "step": 11084 }, { "epoch": 0.7803590285110876, "grad_norm": 2.275749921798706, "learning_rate": 1.6978931701014653e-06, "loss": 0.6232, "step": 11085 }, { "epoch": 0.7804294262583598, "grad_norm": 1.6965659856796265, "learning_rate": 1.696851209802981e-06, "loss": 0.6827, "step": 11086 }, { "epoch": 0.7804998240056318, "grad_norm": 2.420571804046631, "learning_rate": 1.695809525220004e-06, "loss": 0.605, "step": 11087 }, { "epoch": 0.7805702217529039, "grad_norm": 2.053755521774292, "learning_rate": 1.6947681164066925e-06, "loss": 0.5517, "step": 11088 }, { "epoch": 0.780640619500176, "grad_norm": 2.175386428833008, "learning_rate": 1.6937269834171941e-06, "loss": 0.8115, "step": 11089 }, { "epoch": 0.7807110172474481, "grad_norm": 1.9240583181381226, "learning_rate": 1.6926861263056355e-06, "loss": 0.6026, "step": 11090 }, { "epoch": 0.7807814149947202, "grad_norm": 1.8561307191848755, "learning_rate": 1.6916455451261314e-06, "loss": 0.6714, "step": 11091 }, { "epoch": 0.7808518127419922, "grad_norm": 1.793487548828125, "learning_rate": 1.6906052399327809e-06, "loss": 0.7356, "step": 11092 }, { "epoch": 0.7809222104892644, "grad_norm": 1.8967546224594116, "learning_rate": 1.6895652107796749e-06, "loss": 0.6165, "step": 11093 }, { "epoch": 0.7809926082365364, "grad_norm": 2.002232789993286, "learning_rate": 1.6885254577208807e-06, "loss": 0.6295, "step": 11094 }, { "epoch": 0.7810630059838085, "grad_norm": 2.0695247650146484, "learning_rate": 1.68748598081046e-06, "loss": 0.6036, "step": 11095 }, { "epoch": 0.7811334037310806, "grad_norm": 2.079068183898926, "learning_rate": 1.6864467801024553e-06, "loss": 0.6376, "step": 11096 }, { "epoch": 0.7812038014783527, "grad_norm": 1.9811755418777466, "learning_rate": 1.6854078556508938e-06, "loss": 0.717, "step": 11097 }, { "epoch": 0.7812741992256248, "grad_norm": 2.3885338306427, "learning_rate": 1.6843692075097899e-06, "loss": 0.7443, "step": 11098 }, { "epoch": 0.7813445969728968, "grad_norm": 1.8399784564971924, "learning_rate": 1.6833308357331464e-06, "loss": 0.5969, "step": 11099 }, { "epoch": 0.781414994720169, "grad_norm": 2.281334161758423, "learning_rate": 1.682292740374946e-06, "loss": 0.5836, "step": 11100 }, { "epoch": 0.7814853924674411, "grad_norm": 2.1016016006469727, "learning_rate": 1.681254921489164e-06, "loss": 0.6082, "step": 11101 }, { "epoch": 0.7815557902147131, "grad_norm": 1.9106851816177368, "learning_rate": 1.6802173791297558e-06, "loss": 0.5808, "step": 11102 }, { "epoch": 0.7816261879619852, "grad_norm": 1.7404488325119019, "learning_rate": 1.6791801133506633e-06, "loss": 0.6207, "step": 11103 }, { "epoch": 0.7816965857092573, "grad_norm": 2.066589117050171, "learning_rate": 1.6781431242058146e-06, "loss": 0.6795, "step": 11104 }, { "epoch": 0.7817669834565294, "grad_norm": 2.135134220123291, "learning_rate": 1.677106411749126e-06, "loss": 0.7128, "step": 11105 }, { "epoch": 0.7818373812038015, "grad_norm": 1.6378732919692993, "learning_rate": 1.6760699760344938e-06, "loss": 0.6492, "step": 11106 }, { "epoch": 0.7819077789510736, "grad_norm": 1.8816508054733276, "learning_rate": 1.6750338171158068e-06, "loss": 0.7285, "step": 11107 }, { "epoch": 0.7819781766983457, "grad_norm": 1.9181289672851562, "learning_rate": 1.6739979350469345e-06, "loss": 0.7023, "step": 11108 }, { "epoch": 0.7820485744456177, "grad_norm": 1.6321521997451782, "learning_rate": 1.672962329881733e-06, "loss": 0.6461, "step": 11109 }, { "epoch": 0.7821189721928898, "grad_norm": 2.2805120944976807, "learning_rate": 1.671927001674043e-06, "loss": 0.8006, "step": 11110 }, { "epoch": 0.7821893699401619, "grad_norm": 1.7693395614624023, "learning_rate": 1.6708919504776946e-06, "loss": 0.6379, "step": 11111 }, { "epoch": 0.782259767687434, "grad_norm": 2.0257151126861572, "learning_rate": 1.6698571763464985e-06, "loss": 0.656, "step": 11112 }, { "epoch": 0.7823301654347061, "grad_norm": 2.177809000015259, "learning_rate": 1.6688226793342578e-06, "loss": 0.6922, "step": 11113 }, { "epoch": 0.7824005631819781, "grad_norm": 1.7162494659423828, "learning_rate": 1.6677884594947533e-06, "loss": 0.7239, "step": 11114 }, { "epoch": 0.7824709609292503, "grad_norm": 1.77838134765625, "learning_rate": 1.6667545168817558e-06, "loss": 0.6409, "step": 11115 }, { "epoch": 0.7825413586765223, "grad_norm": 1.9595921039581299, "learning_rate": 1.6657208515490194e-06, "loss": 0.595, "step": 11116 }, { "epoch": 0.7826117564237944, "grad_norm": 1.9215511083602905, "learning_rate": 1.6646874635502892e-06, "loss": 0.5606, "step": 11117 }, { "epoch": 0.7826821541710666, "grad_norm": 1.6792012453079224, "learning_rate": 1.6636543529392876e-06, "loss": 0.64, "step": 11118 }, { "epoch": 0.7827525519183386, "grad_norm": 1.9724340438842773, "learning_rate": 1.662621519769731e-06, "loss": 0.6531, "step": 11119 }, { "epoch": 0.7828229496656107, "grad_norm": 2.0066471099853516, "learning_rate": 1.6615889640953131e-06, "loss": 0.6226, "step": 11120 }, { "epoch": 0.7828933474128827, "grad_norm": 1.5381684303283691, "learning_rate": 1.6605566859697237e-06, "loss": 0.595, "step": 11121 }, { "epoch": 0.7829637451601549, "grad_norm": 1.6907211542129517, "learning_rate": 1.6595246854466244e-06, "loss": 0.7387, "step": 11122 }, { "epoch": 0.783034142907427, "grad_norm": 1.8660727739334106, "learning_rate": 1.6584929625796752e-06, "loss": 0.6801, "step": 11123 }, { "epoch": 0.783104540654699, "grad_norm": 1.6023956537246704, "learning_rate": 1.657461517422512e-06, "loss": 0.5748, "step": 11124 }, { "epoch": 0.7831749384019712, "grad_norm": 1.8010411262512207, "learning_rate": 1.6564303500287654e-06, "loss": 0.6613, "step": 11125 }, { "epoch": 0.7832453361492432, "grad_norm": 2.1091854572296143, "learning_rate": 1.6553994604520418e-06, "loss": 0.778, "step": 11126 }, { "epoch": 0.7833157338965153, "grad_norm": 1.934720516204834, "learning_rate": 1.6543688487459451e-06, "loss": 0.6429, "step": 11127 }, { "epoch": 0.7833861316437875, "grad_norm": 1.7607797384262085, "learning_rate": 1.6533385149640487e-06, "loss": 0.6062, "step": 11128 }, { "epoch": 0.7834565293910595, "grad_norm": 1.7739607095718384, "learning_rate": 1.6523084591599284e-06, "loss": 0.6833, "step": 11129 }, { "epoch": 0.7835269271383316, "grad_norm": 1.849339485168457, "learning_rate": 1.6512786813871316e-06, "loss": 0.5876, "step": 11130 }, { "epoch": 0.7835973248856036, "grad_norm": 1.942318081855774, "learning_rate": 1.6502491816992019e-06, "loss": 0.7047, "step": 11131 }, { "epoch": 0.7836677226328758, "grad_norm": 2.3940305709838867, "learning_rate": 1.6492199601496606e-06, "loss": 0.7107, "step": 11132 }, { "epoch": 0.7837381203801478, "grad_norm": 2.1189002990722656, "learning_rate": 1.6481910167920236e-06, "loss": 0.7349, "step": 11133 }, { "epoch": 0.7838085181274199, "grad_norm": 1.8335667848587036, "learning_rate": 1.6471623516797784e-06, "loss": 0.6582, "step": 11134 }, { "epoch": 0.783878915874692, "grad_norm": 1.8057727813720703, "learning_rate": 1.6461339648664124e-06, "loss": 0.6967, "step": 11135 }, { "epoch": 0.7839493136219641, "grad_norm": 1.8569633960723877, "learning_rate": 1.645105856405388e-06, "loss": 0.6796, "step": 11136 }, { "epoch": 0.7840197113692362, "grad_norm": 2.462355136871338, "learning_rate": 1.6440780263501623e-06, "loss": 0.5868, "step": 11137 }, { "epoch": 0.7840901091165082, "grad_norm": 1.9301851987838745, "learning_rate": 1.6430504747541684e-06, "loss": 0.721, "step": 11138 }, { "epoch": 0.7841605068637804, "grad_norm": 2.873478412628174, "learning_rate": 1.6420232016708343e-06, "loss": 0.6047, "step": 11139 }, { "epoch": 0.7842309046110525, "grad_norm": 1.969352126121521, "learning_rate": 1.6409962071535664e-06, "loss": 0.7516, "step": 11140 }, { "epoch": 0.7843013023583245, "grad_norm": 1.8931164741516113, "learning_rate": 1.6399694912557565e-06, "loss": 0.6039, "step": 11141 }, { "epoch": 0.7843717001055966, "grad_norm": 1.8865864276885986, "learning_rate": 1.638943054030789e-06, "loss": 0.614, "step": 11142 }, { "epoch": 0.7844420978528687, "grad_norm": 2.31870174407959, "learning_rate": 1.6379168955320254e-06, "loss": 0.7487, "step": 11143 }, { "epoch": 0.7845124956001408, "grad_norm": 2.31754207611084, "learning_rate": 1.6368910158128211e-06, "loss": 0.694, "step": 11144 }, { "epoch": 0.7845828933474129, "grad_norm": 1.7842503786087036, "learning_rate": 1.63586541492651e-06, "loss": 0.7731, "step": 11145 }, { "epoch": 0.784653291094685, "grad_norm": 1.6091986894607544, "learning_rate": 1.6348400929264137e-06, "loss": 0.784, "step": 11146 }, { "epoch": 0.7847236888419571, "grad_norm": 1.9335802793502808, "learning_rate": 1.6338150498658377e-06, "loss": 0.6536, "step": 11147 }, { "epoch": 0.7847940865892291, "grad_norm": 1.9815512895584106, "learning_rate": 1.6327902857980798e-06, "loss": 0.5696, "step": 11148 }, { "epoch": 0.7848644843365012, "grad_norm": 1.680778980255127, "learning_rate": 1.6317658007764133e-06, "loss": 0.777, "step": 11149 }, { "epoch": 0.7849348820837733, "grad_norm": 1.7695398330688477, "learning_rate": 1.6307415948541074e-06, "loss": 0.6466, "step": 11150 }, { "epoch": 0.7850052798310454, "grad_norm": 2.084780693054199, "learning_rate": 1.6297176680844079e-06, "loss": 0.6528, "step": 11151 }, { "epoch": 0.7850756775783175, "grad_norm": 1.8378831148147583, "learning_rate": 1.6286940205205505e-06, "loss": 0.6889, "step": 11152 }, { "epoch": 0.7851460753255896, "grad_norm": 2.729276657104492, "learning_rate": 1.6276706522157532e-06, "loss": 0.654, "step": 11153 }, { "epoch": 0.7852164730728617, "grad_norm": 1.905706763267517, "learning_rate": 1.6266475632232267e-06, "loss": 0.7795, "step": 11154 }, { "epoch": 0.7852868708201337, "grad_norm": 1.9225305318832397, "learning_rate": 1.6256247535961577e-06, "loss": 0.7133, "step": 11155 }, { "epoch": 0.7853572685674058, "grad_norm": 1.7164913415908813, "learning_rate": 1.6246022233877268e-06, "loss": 0.7091, "step": 11156 }, { "epoch": 0.785427666314678, "grad_norm": 2.1878409385681152, "learning_rate": 1.6235799726510947e-06, "loss": 0.7435, "step": 11157 }, { "epoch": 0.78549806406195, "grad_norm": 1.8221943378448486, "learning_rate": 1.6225580014394093e-06, "loss": 0.5633, "step": 11158 }, { "epoch": 0.7855684618092221, "grad_norm": 1.9369239807128906, "learning_rate": 1.621536309805801e-06, "loss": 0.6474, "step": 11159 }, { "epoch": 0.7856388595564942, "grad_norm": 1.7878022193908691, "learning_rate": 1.6205148978033933e-06, "loss": 0.5737, "step": 11160 }, { "epoch": 0.7857092573037663, "grad_norm": 1.951951265335083, "learning_rate": 1.6194937654852858e-06, "loss": 0.5893, "step": 11161 }, { "epoch": 0.7857796550510384, "grad_norm": 1.878322958946228, "learning_rate": 1.6184729129045726e-06, "loss": 0.5572, "step": 11162 }, { "epoch": 0.7858500527983104, "grad_norm": 1.925378441810608, "learning_rate": 1.6174523401143262e-06, "loss": 0.78, "step": 11163 }, { "epoch": 0.7859204505455826, "grad_norm": 4.010754585266113, "learning_rate": 1.616432047167608e-06, "loss": 0.6226, "step": 11164 }, { "epoch": 0.7859908482928546, "grad_norm": 1.9657721519470215, "learning_rate": 1.6154120341174609e-06, "loss": 0.6455, "step": 11165 }, { "epoch": 0.7860612460401267, "grad_norm": 1.8669992685317993, "learning_rate": 1.6143923010169204e-06, "loss": 0.5702, "step": 11166 }, { "epoch": 0.7861316437873987, "grad_norm": 2.1184823513031006, "learning_rate": 1.6133728479189998e-06, "loss": 0.5493, "step": 11167 }, { "epoch": 0.7862020415346709, "grad_norm": 1.8830924034118652, "learning_rate": 1.6123536748767052e-06, "loss": 0.5485, "step": 11168 }, { "epoch": 0.786272439281943, "grad_norm": 2.405334711074829, "learning_rate": 1.611334781943023e-06, "loss": 0.7894, "step": 11169 }, { "epoch": 0.786342837029215, "grad_norm": 1.8574564456939697, "learning_rate": 1.6103161691709253e-06, "loss": 0.6765, "step": 11170 }, { "epoch": 0.7864132347764872, "grad_norm": 2.137089252471924, "learning_rate": 1.6092978366133691e-06, "loss": 0.6642, "step": 11171 }, { "epoch": 0.7864836325237592, "grad_norm": 1.9004735946655273, "learning_rate": 1.6082797843233024e-06, "loss": 0.6689, "step": 11172 }, { "epoch": 0.7865540302710313, "grad_norm": 1.9096980094909668, "learning_rate": 1.6072620123536505e-06, "loss": 0.7186, "step": 11173 }, { "epoch": 0.7866244280183035, "grad_norm": 2.1087706089019775, "learning_rate": 1.6062445207573327e-06, "loss": 0.6067, "step": 11174 }, { "epoch": 0.7866948257655755, "grad_norm": 1.8040441274642944, "learning_rate": 1.6052273095872475e-06, "loss": 0.7849, "step": 11175 }, { "epoch": 0.7867652235128476, "grad_norm": 2.219050645828247, "learning_rate": 1.6042103788962805e-06, "loss": 0.6467, "step": 11176 }, { "epoch": 0.7868356212601196, "grad_norm": 1.853709101676941, "learning_rate": 1.6031937287373001e-06, "loss": 0.62, "step": 11177 }, { "epoch": 0.7869060190073918, "grad_norm": 1.7779865264892578, "learning_rate": 1.6021773591631676e-06, "loss": 0.5459, "step": 11178 }, { "epoch": 0.7869764167546639, "grad_norm": 2.341951370239258, "learning_rate": 1.6011612702267204e-06, "loss": 0.7022, "step": 11179 }, { "epoch": 0.7870468145019359, "grad_norm": 2.1454789638519287, "learning_rate": 1.6001454619807907e-06, "loss": 0.6606, "step": 11180 }, { "epoch": 0.787117212249208, "grad_norm": 1.775092363357544, "learning_rate": 1.5991299344781885e-06, "loss": 0.6168, "step": 11181 }, { "epoch": 0.7871876099964801, "grad_norm": 1.7805384397506714, "learning_rate": 1.5981146877717125e-06, "loss": 0.6042, "step": 11182 }, { "epoch": 0.7872580077437522, "grad_norm": 1.7752861976623535, "learning_rate": 1.597099721914144e-06, "loss": 0.6167, "step": 11183 }, { "epoch": 0.7873284054910243, "grad_norm": 1.7175688743591309, "learning_rate": 1.5960850369582562e-06, "loss": 0.7495, "step": 11184 }, { "epoch": 0.7873988032382964, "grad_norm": 2.266176462173462, "learning_rate": 1.595070632956799e-06, "loss": 0.5449, "step": 11185 }, { "epoch": 0.7874692009855685, "grad_norm": 2.1978259086608887, "learning_rate": 1.5940565099625174e-06, "loss": 0.6137, "step": 11186 }, { "epoch": 0.7875395987328405, "grad_norm": 2.310164451599121, "learning_rate": 1.593042668028133e-06, "loss": 0.6715, "step": 11187 }, { "epoch": 0.7876099964801127, "grad_norm": 1.6877752542495728, "learning_rate": 1.5920291072063552e-06, "loss": 0.6306, "step": 11188 }, { "epoch": 0.7876803942273847, "grad_norm": 1.7617920637130737, "learning_rate": 1.591015827549884e-06, "loss": 0.5065, "step": 11189 }, { "epoch": 0.7877507919746568, "grad_norm": 1.7321642637252808, "learning_rate": 1.5900028291113967e-06, "loss": 0.7049, "step": 11190 }, { "epoch": 0.7878211897219289, "grad_norm": 1.9638044834136963, "learning_rate": 1.5889901119435642e-06, "loss": 0.6112, "step": 11191 }, { "epoch": 0.787891587469201, "grad_norm": 1.9644232988357544, "learning_rate": 1.5879776760990355e-06, "loss": 0.7448, "step": 11192 }, { "epoch": 0.7879619852164731, "grad_norm": 2.303680658340454, "learning_rate": 1.58696552163045e-06, "loss": 0.6362, "step": 11193 }, { "epoch": 0.7880323829637451, "grad_norm": 1.8292583227157593, "learning_rate": 1.5859536485904266e-06, "loss": 0.6857, "step": 11194 }, { "epoch": 0.7881027807110172, "grad_norm": 2.1460120677948, "learning_rate": 1.5849420570315777e-06, "loss": 0.694, "step": 11195 }, { "epoch": 0.7881731784582894, "grad_norm": 2.4236605167388916, "learning_rate": 1.5839307470064947e-06, "loss": 0.6758, "step": 11196 }, { "epoch": 0.7882435762055614, "grad_norm": 2.088330030441284, "learning_rate": 1.582919718567758e-06, "loss": 0.6134, "step": 11197 }, { "epoch": 0.7883139739528335, "grad_norm": 2.225099802017212, "learning_rate": 1.5819089717679322e-06, "loss": 0.5889, "step": 11198 }, { "epoch": 0.7883843717001056, "grad_norm": 1.4640287160873413, "learning_rate": 1.580898506659565e-06, "loss": 0.5249, "step": 11199 }, { "epoch": 0.7884547694473777, "grad_norm": 1.806365966796875, "learning_rate": 1.579888323295191e-06, "loss": 0.5889, "step": 11200 }, { "epoch": 0.7885251671946498, "grad_norm": 1.8128294944763184, "learning_rate": 1.5788784217273336e-06, "loss": 0.6766, "step": 11201 }, { "epoch": 0.7885955649419218, "grad_norm": 3.2374463081359863, "learning_rate": 1.5778688020084946e-06, "loss": 0.5881, "step": 11202 }, { "epoch": 0.788665962689194, "grad_norm": 1.7775721549987793, "learning_rate": 1.576859464191169e-06, "loss": 0.6303, "step": 11203 }, { "epoch": 0.788736360436466, "grad_norm": 2.231595516204834, "learning_rate": 1.5758504083278315e-06, "loss": 0.6408, "step": 11204 }, { "epoch": 0.7888067581837381, "grad_norm": 1.8019248247146606, "learning_rate": 1.574841634470943e-06, "loss": 0.5297, "step": 11205 }, { "epoch": 0.7888771559310102, "grad_norm": 1.980423927307129, "learning_rate": 1.5738331426729501e-06, "loss": 0.5498, "step": 11206 }, { "epoch": 0.7889475536782823, "grad_norm": 2.1128151416778564, "learning_rate": 1.572824932986288e-06, "loss": 0.7201, "step": 11207 }, { "epoch": 0.7890179514255544, "grad_norm": 1.5770304203033447, "learning_rate": 1.5718170054633714e-06, "loss": 0.6093, "step": 11208 }, { "epoch": 0.7890883491728264, "grad_norm": 1.906086802482605, "learning_rate": 1.5708093601566064e-06, "loss": 0.6047, "step": 11209 }, { "epoch": 0.7891587469200986, "grad_norm": 1.949823021888733, "learning_rate": 1.5698019971183791e-06, "loss": 0.7331, "step": 11210 }, { "epoch": 0.7892291446673706, "grad_norm": 1.903473973274231, "learning_rate": 1.568794916401064e-06, "loss": 0.5802, "step": 11211 }, { "epoch": 0.7892995424146427, "grad_norm": 2.362618923187256, "learning_rate": 1.5677881180570182e-06, "loss": 0.7005, "step": 11212 }, { "epoch": 0.7893699401619149, "grad_norm": 1.962419867515564, "learning_rate": 1.5667816021385906e-06, "loss": 0.6699, "step": 11213 }, { "epoch": 0.7894403379091869, "grad_norm": 1.8860288858413696, "learning_rate": 1.565775368698105e-06, "loss": 0.6602, "step": 11214 }, { "epoch": 0.789510735656459, "grad_norm": 1.9362924098968506, "learning_rate": 1.5647694177878825e-06, "loss": 0.6854, "step": 11215 }, { "epoch": 0.789581133403731, "grad_norm": 1.766741394996643, "learning_rate": 1.5637637494602195e-06, "loss": 0.6157, "step": 11216 }, { "epoch": 0.7896515311510032, "grad_norm": 1.8660367727279663, "learning_rate": 1.5627583637674025e-06, "loss": 0.6443, "step": 11217 }, { "epoch": 0.7897219288982753, "grad_norm": 2.163687229156494, "learning_rate": 1.5617532607617012e-06, "loss": 0.6925, "step": 11218 }, { "epoch": 0.7897923266455473, "grad_norm": 1.6547349691390991, "learning_rate": 1.560748440495375e-06, "loss": 0.6852, "step": 11219 }, { "epoch": 0.7898627243928195, "grad_norm": 1.909751534461975, "learning_rate": 1.5597439030206608e-06, "loss": 0.6817, "step": 11220 }, { "epoch": 0.7899331221400915, "grad_norm": 1.9884084463119507, "learning_rate": 1.5587396483897902e-06, "loss": 0.6798, "step": 11221 }, { "epoch": 0.7900035198873636, "grad_norm": 2.1216928958892822, "learning_rate": 1.5577356766549737e-06, "loss": 0.631, "step": 11222 }, { "epoch": 0.7900739176346356, "grad_norm": 1.9194802045822144, "learning_rate": 1.5567319878684076e-06, "loss": 0.6399, "step": 11223 }, { "epoch": 0.7901443153819078, "grad_norm": 1.579756498336792, "learning_rate": 1.5557285820822733e-06, "loss": 0.6521, "step": 11224 }, { "epoch": 0.7902147131291799, "grad_norm": 1.89894437789917, "learning_rate": 1.5547254593487428e-06, "loss": 0.6697, "step": 11225 }, { "epoch": 0.7902851108764519, "grad_norm": 1.7662122249603271, "learning_rate": 1.5537226197199647e-06, "loss": 0.6534, "step": 11226 }, { "epoch": 0.7903555086237241, "grad_norm": 2.0041792392730713, "learning_rate": 1.5527200632480824e-06, "loss": 0.7241, "step": 11227 }, { "epoch": 0.7904259063709961, "grad_norm": 1.8210434913635254, "learning_rate": 1.5517177899852162e-06, "loss": 0.6284, "step": 11228 }, { "epoch": 0.7904963041182682, "grad_norm": 2.1379621028900146, "learning_rate": 1.5507157999834792e-06, "loss": 0.627, "step": 11229 }, { "epoch": 0.7905667018655403, "grad_norm": 2.2805116176605225, "learning_rate": 1.5497140932949593e-06, "loss": 0.6891, "step": 11230 }, { "epoch": 0.7906370996128124, "grad_norm": 2.0883426666259766, "learning_rate": 1.548712669971742e-06, "loss": 0.6181, "step": 11231 }, { "epoch": 0.7907074973600845, "grad_norm": 2.0271170139312744, "learning_rate": 1.5477115300658876e-06, "loss": 0.543, "step": 11232 }, { "epoch": 0.7907778951073565, "grad_norm": 5.069008827209473, "learning_rate": 1.5467106736294505e-06, "loss": 0.5683, "step": 11233 }, { "epoch": 0.7908482928546287, "grad_norm": 2.011101484298706, "learning_rate": 1.5457101007144624e-06, "loss": 0.6261, "step": 11234 }, { "epoch": 0.7909186906019008, "grad_norm": 1.8853598833084106, "learning_rate": 1.54470981137295e-06, "loss": 0.5945, "step": 11235 }, { "epoch": 0.7909890883491728, "grad_norm": 1.9928348064422607, "learning_rate": 1.5437098056569118e-06, "loss": 0.6555, "step": 11236 }, { "epoch": 0.7910594860964449, "grad_norm": 1.9339262247085571, "learning_rate": 1.5427100836183435e-06, "loss": 0.5948, "step": 11237 }, { "epoch": 0.791129883843717, "grad_norm": 1.764718770980835, "learning_rate": 1.5417106453092182e-06, "loss": 0.5502, "step": 11238 }, { "epoch": 0.7912002815909891, "grad_norm": 2.087714195251465, "learning_rate": 1.5407114907815e-06, "loss": 0.6503, "step": 11239 }, { "epoch": 0.7912706793382612, "grad_norm": 1.670902132987976, "learning_rate": 1.5397126200871373e-06, "loss": 0.636, "step": 11240 }, { "epoch": 0.7913410770855333, "grad_norm": 2.019625425338745, "learning_rate": 1.538714033278061e-06, "loss": 0.5729, "step": 11241 }, { "epoch": 0.7914114748328054, "grad_norm": 1.8858132362365723, "learning_rate": 1.5377157304061887e-06, "loss": 0.6308, "step": 11242 }, { "epoch": 0.7914818725800774, "grad_norm": 2.177860975265503, "learning_rate": 1.5367177115234198e-06, "loss": 0.5813, "step": 11243 }, { "epoch": 0.7915522703273495, "grad_norm": 2.2972328662872314, "learning_rate": 1.5357199766816472e-06, "loss": 0.6665, "step": 11244 }, { "epoch": 0.7916226680746216, "grad_norm": 2.021249532699585, "learning_rate": 1.534722525932739e-06, "loss": 0.6103, "step": 11245 }, { "epoch": 0.7916930658218937, "grad_norm": 1.8931219577789307, "learning_rate": 1.5337253593285592e-06, "loss": 0.5773, "step": 11246 }, { "epoch": 0.7917634635691658, "grad_norm": 1.7485551834106445, "learning_rate": 1.5327284769209483e-06, "loss": 0.6855, "step": 11247 }, { "epoch": 0.7918338613164378, "grad_norm": 1.9976626634597778, "learning_rate": 1.5317318787617351e-06, "loss": 0.6118, "step": 11248 }, { "epoch": 0.79190425906371, "grad_norm": 1.8768340349197388, "learning_rate": 1.5307355649027324e-06, "loss": 0.6205, "step": 11249 }, { "epoch": 0.791974656810982, "grad_norm": 1.7207293510437012, "learning_rate": 1.5297395353957424e-06, "loss": 0.6772, "step": 11250 }, { "epoch": 0.7920450545582541, "grad_norm": 1.7383793592453003, "learning_rate": 1.5287437902925464e-06, "loss": 0.5527, "step": 11251 }, { "epoch": 0.7921154523055263, "grad_norm": 1.839133858680725, "learning_rate": 1.5277483296449174e-06, "loss": 0.6094, "step": 11252 }, { "epoch": 0.7921858500527983, "grad_norm": 3.4630885124206543, "learning_rate": 1.526753153504609e-06, "loss": 0.683, "step": 11253 }, { "epoch": 0.7922562478000704, "grad_norm": 1.806921124458313, "learning_rate": 1.5257582619233614e-06, "loss": 0.658, "step": 11254 }, { "epoch": 0.7923266455473424, "grad_norm": 1.8149384260177612, "learning_rate": 1.5247636549528971e-06, "loss": 0.5469, "step": 11255 }, { "epoch": 0.7923970432946146, "grad_norm": 1.783969521522522, "learning_rate": 1.5237693326449312e-06, "loss": 0.6515, "step": 11256 }, { "epoch": 0.7924674410418867, "grad_norm": 1.6275043487548828, "learning_rate": 1.5227752950511552e-06, "loss": 0.5798, "step": 11257 }, { "epoch": 0.7925378387891587, "grad_norm": 1.8110941648483276, "learning_rate": 1.521781542223254e-06, "loss": 0.5755, "step": 11258 }, { "epoch": 0.7926082365364309, "grad_norm": 2.0637526512145996, "learning_rate": 1.520788074212892e-06, "loss": 0.5482, "step": 11259 }, { "epoch": 0.7926786342837029, "grad_norm": 1.5671215057373047, "learning_rate": 1.51979489107172e-06, "loss": 0.6745, "step": 11260 }, { "epoch": 0.792749032030975, "grad_norm": 2.1400701999664307, "learning_rate": 1.518801992851373e-06, "loss": 0.6605, "step": 11261 }, { "epoch": 0.792819429778247, "grad_norm": 1.8211106061935425, "learning_rate": 1.5178093796034764e-06, "loss": 0.5649, "step": 11262 }, { "epoch": 0.7928898275255192, "grad_norm": 1.869382619857788, "learning_rate": 1.516817051379633e-06, "loss": 0.741, "step": 11263 }, { "epoch": 0.7929602252727913, "grad_norm": 1.8937757015228271, "learning_rate": 1.5158250082314387e-06, "loss": 0.7003, "step": 11264 }, { "epoch": 0.7930306230200633, "grad_norm": 1.9148468971252441, "learning_rate": 1.5148332502104697e-06, "loss": 0.703, "step": 11265 }, { "epoch": 0.7931010207673355, "grad_norm": 2.082277536392212, "learning_rate": 1.513841777368287e-06, "loss": 0.6188, "step": 11266 }, { "epoch": 0.7931714185146075, "grad_norm": 1.8408116102218628, "learning_rate": 1.512850589756437e-06, "loss": 0.574, "step": 11267 }, { "epoch": 0.7932418162618796, "grad_norm": 2.1696865558624268, "learning_rate": 1.511859687426457e-06, "loss": 0.6132, "step": 11268 }, { "epoch": 0.7933122140091518, "grad_norm": 1.9473010301589966, "learning_rate": 1.510869070429859e-06, "loss": 0.6288, "step": 11269 }, { "epoch": 0.7933826117564238, "grad_norm": 1.9700613021850586, "learning_rate": 1.5098787388181526e-06, "loss": 0.622, "step": 11270 }, { "epoch": 0.7934530095036959, "grad_norm": 1.846717357635498, "learning_rate": 1.5088886926428215e-06, "loss": 0.5384, "step": 11271 }, { "epoch": 0.7935234072509679, "grad_norm": 2.233133316040039, "learning_rate": 1.507898931955341e-06, "loss": 0.6516, "step": 11272 }, { "epoch": 0.7935938049982401, "grad_norm": 2.0047366619110107, "learning_rate": 1.506909456807167e-06, "loss": 0.6135, "step": 11273 }, { "epoch": 0.7936642027455122, "grad_norm": 2.2033889293670654, "learning_rate": 1.505920267249747e-06, "loss": 0.6758, "step": 11274 }, { "epoch": 0.7937346004927842, "grad_norm": 1.5893237590789795, "learning_rate": 1.5049313633345066e-06, "loss": 0.5681, "step": 11275 }, { "epoch": 0.7938049982400563, "grad_norm": 1.7532336711883545, "learning_rate": 1.5039427451128625e-06, "loss": 0.6724, "step": 11276 }, { "epoch": 0.7938753959873284, "grad_norm": 1.8150497674942017, "learning_rate": 1.5029544126362127e-06, "loss": 0.6727, "step": 11277 }, { "epoch": 0.7939457937346005, "grad_norm": 2.1115150451660156, "learning_rate": 1.5019663659559413e-06, "loss": 0.6579, "step": 11278 }, { "epoch": 0.7940161914818726, "grad_norm": 2.001028060913086, "learning_rate": 1.500978605123416e-06, "loss": 0.6479, "step": 11279 }, { "epoch": 0.7940865892291447, "grad_norm": 1.964374303817749, "learning_rate": 1.4999911301899957e-06, "loss": 0.5868, "step": 11280 }, { "epoch": 0.7941569869764168, "grad_norm": 1.8014540672302246, "learning_rate": 1.499003941207015e-06, "loss": 0.7072, "step": 11281 }, { "epoch": 0.7942273847236888, "grad_norm": 2.18180251121521, "learning_rate": 1.498017038225804e-06, "loss": 0.7253, "step": 11282 }, { "epoch": 0.794297782470961, "grad_norm": 2.2905514240264893, "learning_rate": 1.4970304212976691e-06, "loss": 0.7653, "step": 11283 }, { "epoch": 0.794368180218233, "grad_norm": 2.056652545928955, "learning_rate": 1.4960440904739073e-06, "loss": 0.67, "step": 11284 }, { "epoch": 0.7944385779655051, "grad_norm": 2.067701578140259, "learning_rate": 1.4950580458057954e-06, "loss": 0.5904, "step": 11285 }, { "epoch": 0.7945089757127772, "grad_norm": 2.0860769748687744, "learning_rate": 1.4940722873446039e-06, "loss": 0.6808, "step": 11286 }, { "epoch": 0.7945793734600493, "grad_norm": 1.8879669904708862, "learning_rate": 1.4930868151415776e-06, "loss": 0.6585, "step": 11287 }, { "epoch": 0.7946497712073214, "grad_norm": 1.8322242498397827, "learning_rate": 1.4921016292479576e-06, "loss": 0.6423, "step": 11288 }, { "epoch": 0.7947201689545934, "grad_norm": 2.2098991870880127, "learning_rate": 1.4911167297149625e-06, "loss": 0.6443, "step": 11289 }, { "epoch": 0.7947905667018655, "grad_norm": 1.9363467693328857, "learning_rate": 1.4901321165937959e-06, "loss": 0.6227, "step": 11290 }, { "epoch": 0.7948609644491377, "grad_norm": 2.6937050819396973, "learning_rate": 1.489147789935652e-06, "loss": 0.7298, "step": 11291 }, { "epoch": 0.7949313621964097, "grad_norm": 3.3898191452026367, "learning_rate": 1.4881637497917036e-06, "loss": 0.6581, "step": 11292 }, { "epoch": 0.7950017599436818, "grad_norm": 1.8404685258865356, "learning_rate": 1.4871799962131156e-06, "loss": 0.6658, "step": 11293 }, { "epoch": 0.7950721576909539, "grad_norm": 1.7160937786102295, "learning_rate": 1.4861965292510325e-06, "loss": 0.7059, "step": 11294 }, { "epoch": 0.795142555438226, "grad_norm": 1.6603354215621948, "learning_rate": 1.4852133489565858e-06, "loss": 0.6896, "step": 11295 }, { "epoch": 0.7952129531854981, "grad_norm": 1.7605094909667969, "learning_rate": 1.484230455380889e-06, "loss": 0.7083, "step": 11296 }, { "epoch": 0.7952833509327701, "grad_norm": 1.9338182210922241, "learning_rate": 1.483247848575048e-06, "loss": 0.7174, "step": 11297 }, { "epoch": 0.7953537486800423, "grad_norm": 1.693452000617981, "learning_rate": 1.4822655285901461e-06, "loss": 0.6671, "step": 11298 }, { "epoch": 0.7954241464273143, "grad_norm": 2.4138917922973633, "learning_rate": 1.4812834954772589e-06, "loss": 0.7405, "step": 11299 }, { "epoch": 0.7954945441745864, "grad_norm": 2.0573744773864746, "learning_rate": 1.4803017492874395e-06, "loss": 0.6611, "step": 11300 }, { "epoch": 0.7955649419218584, "grad_norm": 1.7144583463668823, "learning_rate": 1.479320290071732e-06, "loss": 0.5746, "step": 11301 }, { "epoch": 0.7956353396691306, "grad_norm": 1.9715425968170166, "learning_rate": 1.4783391178811606e-06, "loss": 0.6052, "step": 11302 }, { "epoch": 0.7957057374164027, "grad_norm": 2.55731463432312, "learning_rate": 1.4773582327667407e-06, "loss": 0.6859, "step": 11303 }, { "epoch": 0.7957761351636747, "grad_norm": 2.941145658493042, "learning_rate": 1.4763776347794663e-06, "loss": 0.7211, "step": 11304 }, { "epoch": 0.7958465329109469, "grad_norm": 2.024477005004883, "learning_rate": 1.4753973239703228e-06, "loss": 0.6032, "step": 11305 }, { "epoch": 0.7959169306582189, "grad_norm": 1.8886548280715942, "learning_rate": 1.474417300390276e-06, "loss": 0.6182, "step": 11306 }, { "epoch": 0.795987328405491, "grad_norm": 2.3313779830932617, "learning_rate": 1.4734375640902779e-06, "loss": 0.6112, "step": 11307 }, { "epoch": 0.7960577261527632, "grad_norm": 1.6120902299880981, "learning_rate": 1.472458115121264e-06, "loss": 0.7178, "step": 11308 }, { "epoch": 0.7961281239000352, "grad_norm": 1.7141472101211548, "learning_rate": 1.4714789535341606e-06, "loss": 0.5853, "step": 11309 }, { "epoch": 0.7961985216473073, "grad_norm": 2.1929807662963867, "learning_rate": 1.4705000793798713e-06, "loss": 0.7112, "step": 11310 }, { "epoch": 0.7962689193945793, "grad_norm": 2.6587483882904053, "learning_rate": 1.469521492709293e-06, "loss": 0.6662, "step": 11311 }, { "epoch": 0.7963393171418515, "grad_norm": 1.86634361743927, "learning_rate": 1.4685431935733008e-06, "loss": 0.6219, "step": 11312 }, { "epoch": 0.7964097148891236, "grad_norm": 1.9872443675994873, "learning_rate": 1.4675651820227577e-06, "loss": 0.6355, "step": 11313 }, { "epoch": 0.7964801126363956, "grad_norm": 2.2503440380096436, "learning_rate": 1.4665874581085093e-06, "loss": 0.67, "step": 11314 }, { "epoch": 0.7965505103836678, "grad_norm": 2.2943084239959717, "learning_rate": 1.4656100218813922e-06, "loss": 0.6219, "step": 11315 }, { "epoch": 0.7966209081309398, "grad_norm": 1.858384609222412, "learning_rate": 1.4646328733922206e-06, "loss": 0.7125, "step": 11316 }, { "epoch": 0.7966913058782119, "grad_norm": 1.856398582458496, "learning_rate": 1.4636560126918006e-06, "loss": 0.688, "step": 11317 }, { "epoch": 0.7967617036254839, "grad_norm": 1.9442437887191772, "learning_rate": 1.4626794398309186e-06, "loss": 0.7171, "step": 11318 }, { "epoch": 0.7968321013727561, "grad_norm": 2.2677114009857178, "learning_rate": 1.4617031548603472e-06, "loss": 0.7344, "step": 11319 }, { "epoch": 0.7969024991200282, "grad_norm": 1.9007998704910278, "learning_rate": 1.460727157830843e-06, "loss": 0.6621, "step": 11320 }, { "epoch": 0.7969728968673002, "grad_norm": 1.7070995569229126, "learning_rate": 1.4597514487931522e-06, "loss": 0.6978, "step": 11321 }, { "epoch": 0.7970432946145724, "grad_norm": 1.9699820280075073, "learning_rate": 1.4587760277979996e-06, "loss": 0.6516, "step": 11322 }, { "epoch": 0.7971136923618444, "grad_norm": 2.1600401401519775, "learning_rate": 1.457800894896101e-06, "loss": 0.711, "step": 11323 }, { "epoch": 0.7971840901091165, "grad_norm": 2.270263195037842, "learning_rate": 1.456826050138154e-06, "loss": 0.6221, "step": 11324 }, { "epoch": 0.7972544878563886, "grad_norm": 1.6153262853622437, "learning_rate": 1.4558514935748402e-06, "loss": 0.6801, "step": 11325 }, { "epoch": 0.7973248856036607, "grad_norm": 2.5926828384399414, "learning_rate": 1.4548772252568262e-06, "loss": 0.7235, "step": 11326 }, { "epoch": 0.7973952833509328, "grad_norm": 1.8965530395507812, "learning_rate": 1.4539032452347702e-06, "loss": 0.5666, "step": 11327 }, { "epoch": 0.7974656810982048, "grad_norm": 2.019597291946411, "learning_rate": 1.4529295535593048e-06, "loss": 0.7345, "step": 11328 }, { "epoch": 0.797536078845477, "grad_norm": 2.17825984954834, "learning_rate": 1.451956150281057e-06, "loss": 0.6205, "step": 11329 }, { "epoch": 0.7976064765927491, "grad_norm": 1.9837154150009155, "learning_rate": 1.4509830354506342e-06, "loss": 0.6207, "step": 11330 }, { "epoch": 0.7976768743400211, "grad_norm": 1.8884116411209106, "learning_rate": 1.4500102091186288e-06, "loss": 0.6254, "step": 11331 }, { "epoch": 0.7977472720872932, "grad_norm": 1.7636656761169434, "learning_rate": 1.449037671335617e-06, "loss": 0.6887, "step": 11332 }, { "epoch": 0.7978176698345653, "grad_norm": 1.9112602472305298, "learning_rate": 1.4480654221521657e-06, "loss": 0.7742, "step": 11333 }, { "epoch": 0.7978880675818374, "grad_norm": 1.7022463083267212, "learning_rate": 1.4470934616188192e-06, "loss": 0.6427, "step": 11334 }, { "epoch": 0.7979584653291095, "grad_norm": 1.7339926958084106, "learning_rate": 1.4461217897861154e-06, "loss": 0.6755, "step": 11335 }, { "epoch": 0.7980288630763815, "grad_norm": 1.850535273551941, "learning_rate": 1.445150406704567e-06, "loss": 0.6828, "step": 11336 }, { "epoch": 0.7980992608236537, "grad_norm": 1.9330639839172363, "learning_rate": 1.4441793124246837e-06, "loss": 0.6695, "step": 11337 }, { "epoch": 0.7981696585709257, "grad_norm": 1.6631863117218018, "learning_rate": 1.4432085069969457e-06, "loss": 0.5305, "step": 11338 }, { "epoch": 0.7982400563181978, "grad_norm": 1.7349246740341187, "learning_rate": 1.44223799047183e-06, "loss": 0.599, "step": 11339 }, { "epoch": 0.7983104540654699, "grad_norm": 1.9773101806640625, "learning_rate": 1.4412677628997968e-06, "loss": 0.6924, "step": 11340 }, { "epoch": 0.798380851812742, "grad_norm": 2.0680389404296875, "learning_rate": 1.440297824331284e-06, "loss": 0.7041, "step": 11341 }, { "epoch": 0.7984512495600141, "grad_norm": 2.40694260597229, "learning_rate": 1.4393281748167255e-06, "loss": 0.6501, "step": 11342 }, { "epoch": 0.7985216473072861, "grad_norm": 1.8158074617385864, "learning_rate": 1.4383588144065305e-06, "loss": 0.6469, "step": 11343 }, { "epoch": 0.7985920450545583, "grad_norm": 1.8681696653366089, "learning_rate": 1.4373897431510983e-06, "loss": 0.6285, "step": 11344 }, { "epoch": 0.7986624428018303, "grad_norm": 2.0735113620758057, "learning_rate": 1.4364209611008084e-06, "loss": 0.7115, "step": 11345 }, { "epoch": 0.7987328405491024, "grad_norm": 1.7642958164215088, "learning_rate": 1.435452468306034e-06, "loss": 0.5954, "step": 11346 }, { "epoch": 0.7988032382963746, "grad_norm": 2.7819740772247314, "learning_rate": 1.4344842648171237e-06, "loss": 0.6064, "step": 11347 }, { "epoch": 0.7988736360436466, "grad_norm": 2.157402515411377, "learning_rate": 1.433516350684418e-06, "loss": 0.6896, "step": 11348 }, { "epoch": 0.7989440337909187, "grad_norm": 1.5065827369689941, "learning_rate": 1.432548725958239e-06, "loss": 0.5946, "step": 11349 }, { "epoch": 0.7990144315381907, "grad_norm": 1.9106087684631348, "learning_rate": 1.4315813906888934e-06, "loss": 0.6005, "step": 11350 }, { "epoch": 0.7990848292854629, "grad_norm": 1.9756278991699219, "learning_rate": 1.4306143449266732e-06, "loss": 0.6078, "step": 11351 }, { "epoch": 0.799155227032735, "grad_norm": 1.8290400505065918, "learning_rate": 1.429647588721859e-06, "loss": 0.5706, "step": 11352 }, { "epoch": 0.799225624780007, "grad_norm": 1.7628275156021118, "learning_rate": 1.4286811221247086e-06, "loss": 0.6824, "step": 11353 }, { "epoch": 0.7992960225272792, "grad_norm": 1.9750852584838867, "learning_rate": 1.4277149451854752e-06, "loss": 0.5995, "step": 11354 }, { "epoch": 0.7993664202745512, "grad_norm": 2.7035372257232666, "learning_rate": 1.4267490579543882e-06, "loss": 0.6676, "step": 11355 }, { "epoch": 0.7994368180218233, "grad_norm": 2.0212388038635254, "learning_rate": 1.425783460481665e-06, "loss": 0.6436, "step": 11356 }, { "epoch": 0.7995072157690953, "grad_norm": 2.0253102779388428, "learning_rate": 1.4248181528175057e-06, "loss": 0.6634, "step": 11357 }, { "epoch": 0.7995776135163675, "grad_norm": 1.765637993812561, "learning_rate": 1.4238531350121017e-06, "loss": 0.6474, "step": 11358 }, { "epoch": 0.7996480112636396, "grad_norm": 2.072031021118164, "learning_rate": 1.4228884071156225e-06, "loss": 0.677, "step": 11359 }, { "epoch": 0.7997184090109116, "grad_norm": 1.739799976348877, "learning_rate": 1.4219239691782269e-06, "loss": 0.6015, "step": 11360 }, { "epoch": 0.7997888067581838, "grad_norm": 1.9842106103897095, "learning_rate": 1.4209598212500563e-06, "loss": 0.6658, "step": 11361 }, { "epoch": 0.7998592045054558, "grad_norm": 1.7701550722122192, "learning_rate": 1.4199959633812366e-06, "loss": 0.5757, "step": 11362 }, { "epoch": 0.7999296022527279, "grad_norm": 1.9236843585968018, "learning_rate": 1.4190323956218795e-06, "loss": 0.5912, "step": 11363 }, { "epoch": 0.8, "grad_norm": 2.118994951248169, "learning_rate": 1.4180691180220839e-06, "loss": 0.5779, "step": 11364 }, { "epoch": 0.8000703977472721, "grad_norm": 2.1276960372924805, "learning_rate": 1.4171061306319276e-06, "loss": 0.5404, "step": 11365 }, { "epoch": 0.8001407954945442, "grad_norm": 2.2069308757781982, "learning_rate": 1.4161434335014827e-06, "loss": 0.6361, "step": 11366 }, { "epoch": 0.8002111932418162, "grad_norm": 2.143888235092163, "learning_rate": 1.4151810266807975e-06, "loss": 0.6441, "step": 11367 }, { "epoch": 0.8002815909890884, "grad_norm": 2.2175118923187256, "learning_rate": 1.4142189102199085e-06, "loss": 0.6236, "step": 11368 }, { "epoch": 0.8003519887363605, "grad_norm": 1.8238818645477295, "learning_rate": 1.4132570841688347e-06, "loss": 0.6647, "step": 11369 }, { "epoch": 0.8004223864836325, "grad_norm": 1.8080753087997437, "learning_rate": 1.412295548577587e-06, "loss": 0.7154, "step": 11370 }, { "epoch": 0.8004927842309046, "grad_norm": 1.720168113708496, "learning_rate": 1.4113343034961527e-06, "loss": 0.4807, "step": 11371 }, { "epoch": 0.8005631819781767, "grad_norm": 2.1643388271331787, "learning_rate": 1.410373348974511e-06, "loss": 0.668, "step": 11372 }, { "epoch": 0.8006335797254488, "grad_norm": 2.1322219371795654, "learning_rate": 1.4094126850626215e-06, "loss": 0.6701, "step": 11373 }, { "epoch": 0.8007039774727208, "grad_norm": 1.7646738290786743, "learning_rate": 1.4084523118104291e-06, "loss": 0.6101, "step": 11374 }, { "epoch": 0.800774375219993, "grad_norm": 1.930837631225586, "learning_rate": 1.4074922292678635e-06, "loss": 0.61, "step": 11375 }, { "epoch": 0.8008447729672651, "grad_norm": 1.583247423171997, "learning_rate": 1.4065324374848432e-06, "loss": 0.6192, "step": 11376 }, { "epoch": 0.8009151707145371, "grad_norm": 1.9839202165603638, "learning_rate": 1.4055729365112661e-06, "loss": 0.5203, "step": 11377 }, { "epoch": 0.8009855684618092, "grad_norm": 1.9675312042236328, "learning_rate": 1.4046137263970197e-06, "loss": 0.6194, "step": 11378 }, { "epoch": 0.8010559662090813, "grad_norm": 1.701028823852539, "learning_rate": 1.403654807191974e-06, "loss": 0.8156, "step": 11379 }, { "epoch": 0.8011263639563534, "grad_norm": 2.733729839324951, "learning_rate": 1.4026961789459823e-06, "loss": 0.5672, "step": 11380 }, { "epoch": 0.8011967617036255, "grad_norm": 1.739900827407837, "learning_rate": 1.4017378417088844e-06, "loss": 0.7126, "step": 11381 }, { "epoch": 0.8012671594508975, "grad_norm": 1.9974050521850586, "learning_rate": 1.4007797955305078e-06, "loss": 0.6567, "step": 11382 }, { "epoch": 0.8013375571981697, "grad_norm": 1.9759145975112915, "learning_rate": 1.3998220404606587e-06, "loss": 0.6065, "step": 11383 }, { "epoch": 0.8014079549454417, "grad_norm": 2.022333860397339, "learning_rate": 1.3988645765491352e-06, "loss": 0.6012, "step": 11384 }, { "epoch": 0.8014783526927138, "grad_norm": 1.8751697540283203, "learning_rate": 1.397907403845715e-06, "loss": 0.5995, "step": 11385 }, { "epoch": 0.801548750439986, "grad_norm": 1.9361951351165771, "learning_rate": 1.3969505224001627e-06, "loss": 0.6946, "step": 11386 }, { "epoch": 0.801619148187258, "grad_norm": 1.8390111923217773, "learning_rate": 1.3959939322622247e-06, "loss": 0.7618, "step": 11387 }, { "epoch": 0.8016895459345301, "grad_norm": 1.883154034614563, "learning_rate": 1.39503763348164e-06, "loss": 0.5858, "step": 11388 }, { "epoch": 0.8017599436818021, "grad_norm": 1.5163835287094116, "learning_rate": 1.394081626108123e-06, "loss": 0.6163, "step": 11389 }, { "epoch": 0.8018303414290743, "grad_norm": 1.955304741859436, "learning_rate": 1.3931259101913808e-06, "loss": 0.6804, "step": 11390 }, { "epoch": 0.8019007391763464, "grad_norm": 1.8714109659194946, "learning_rate": 1.3921704857811007e-06, "loss": 0.6727, "step": 11391 }, { "epoch": 0.8019711369236184, "grad_norm": 2.1342484951019287, "learning_rate": 1.391215352926954e-06, "loss": 0.6799, "step": 11392 }, { "epoch": 0.8020415346708906, "grad_norm": 1.720576524734497, "learning_rate": 1.3902605116786024e-06, "loss": 0.6918, "step": 11393 }, { "epoch": 0.8021119324181626, "grad_norm": 1.9192559719085693, "learning_rate": 1.3893059620856854e-06, "loss": 0.7152, "step": 11394 }, { "epoch": 0.8021823301654347, "grad_norm": 2.001103401184082, "learning_rate": 1.388351704197835e-06, "loss": 0.6401, "step": 11395 }, { "epoch": 0.8022527279127067, "grad_norm": 2.000474452972412, "learning_rate": 1.3873977380646613e-06, "loss": 0.6468, "step": 11396 }, { "epoch": 0.8023231256599789, "grad_norm": 1.9348132610321045, "learning_rate": 1.386444063735762e-06, "loss": 0.6129, "step": 11397 }, { "epoch": 0.802393523407251, "grad_norm": 1.903549313545227, "learning_rate": 1.3854906812607185e-06, "loss": 0.646, "step": 11398 }, { "epoch": 0.802463921154523, "grad_norm": 1.839813232421875, "learning_rate": 1.384537590689101e-06, "loss": 0.5642, "step": 11399 }, { "epoch": 0.8025343189017952, "grad_norm": 1.812215805053711, "learning_rate": 1.383584792070458e-06, "loss": 0.5978, "step": 11400 }, { "epoch": 0.8026047166490672, "grad_norm": 1.801242709159851, "learning_rate": 1.3826322854543299e-06, "loss": 0.5847, "step": 11401 }, { "epoch": 0.8026751143963393, "grad_norm": 1.7318964004516602, "learning_rate": 1.381680070890236e-06, "loss": 0.7605, "step": 11402 }, { "epoch": 0.8027455121436115, "grad_norm": 2.2511987686157227, "learning_rate": 1.3807281484276847e-06, "loss": 0.6879, "step": 11403 }, { "epoch": 0.8028159098908835, "grad_norm": 1.6148810386657715, "learning_rate": 1.3797765181161632e-06, "loss": 0.7123, "step": 11404 }, { "epoch": 0.8028863076381556, "grad_norm": 1.875288963317871, "learning_rate": 1.3788251800051532e-06, "loss": 0.6539, "step": 11405 }, { "epoch": 0.8029567053854276, "grad_norm": 1.9759695529937744, "learning_rate": 1.3778741341441107e-06, "loss": 0.7427, "step": 11406 }, { "epoch": 0.8030271031326998, "grad_norm": 2.0365777015686035, "learning_rate": 1.3769233805824855e-06, "loss": 0.6286, "step": 11407 }, { "epoch": 0.8030975008799719, "grad_norm": 2.034219264984131, "learning_rate": 1.375972919369706e-06, "loss": 0.7596, "step": 11408 }, { "epoch": 0.8031678986272439, "grad_norm": 1.7592711448669434, "learning_rate": 1.3750227505551885e-06, "loss": 0.576, "step": 11409 }, { "epoch": 0.803238296374516, "grad_norm": 1.8435781002044678, "learning_rate": 1.37407287418833e-06, "loss": 0.543, "step": 11410 }, { "epoch": 0.8033086941217881, "grad_norm": 1.8639764785766602, "learning_rate": 1.37312329031852e-06, "loss": 0.7287, "step": 11411 }, { "epoch": 0.8033790918690602, "grad_norm": 1.983073115348816, "learning_rate": 1.3721739989951245e-06, "loss": 0.7526, "step": 11412 }, { "epoch": 0.8034494896163322, "grad_norm": 2.3264198303222656, "learning_rate": 1.3712250002675014e-06, "loss": 0.6072, "step": 11413 }, { "epoch": 0.8035198873636044, "grad_norm": 1.6777223348617554, "learning_rate": 1.3702762941849876e-06, "loss": 0.6871, "step": 11414 }, { "epoch": 0.8035902851108765, "grad_norm": 2.6157119274139404, "learning_rate": 1.3693278807969086e-06, "loss": 0.7756, "step": 11415 }, { "epoch": 0.8036606828581485, "grad_norm": 2.08610200881958, "learning_rate": 1.368379760152571e-06, "loss": 0.565, "step": 11416 }, { "epoch": 0.8037310806054206, "grad_norm": 2.056652784347534, "learning_rate": 1.367431932301271e-06, "loss": 0.5624, "step": 11417 }, { "epoch": 0.8038014783526927, "grad_norm": 1.7646756172180176, "learning_rate": 1.3664843972922842e-06, "loss": 0.5946, "step": 11418 }, { "epoch": 0.8038718760999648, "grad_norm": 2.1260688304901123, "learning_rate": 1.3655371551748777e-06, "loss": 0.616, "step": 11419 }, { "epoch": 0.8039422738472369, "grad_norm": 1.9007340669631958, "learning_rate": 1.3645902059982978e-06, "loss": 0.6272, "step": 11420 }, { "epoch": 0.804012671594509, "grad_norm": 1.8246557712554932, "learning_rate": 1.3636435498117757e-06, "loss": 0.651, "step": 11421 }, { "epoch": 0.8040830693417811, "grad_norm": 1.8492770195007324, "learning_rate": 1.362697186664529e-06, "loss": 0.5674, "step": 11422 }, { "epoch": 0.8041534670890531, "grad_norm": 1.6899116039276123, "learning_rate": 1.3617511166057628e-06, "loss": 0.5827, "step": 11423 }, { "epoch": 0.8042238648363252, "grad_norm": 2.3094027042388916, "learning_rate": 1.3608053396846607e-06, "loss": 0.6563, "step": 11424 }, { "epoch": 0.8042942625835974, "grad_norm": 1.8328922986984253, "learning_rate": 1.359859855950397e-06, "loss": 0.5963, "step": 11425 }, { "epoch": 0.8043646603308694, "grad_norm": 1.6510944366455078, "learning_rate": 1.3589146654521286e-06, "loss": 0.6017, "step": 11426 }, { "epoch": 0.8044350580781415, "grad_norm": 2.0927212238311768, "learning_rate": 1.357969768238995e-06, "loss": 0.6628, "step": 11427 }, { "epoch": 0.8045054558254136, "grad_norm": 1.7044569253921509, "learning_rate": 1.3570251643601215e-06, "loss": 0.6275, "step": 11428 }, { "epoch": 0.8045758535726857, "grad_norm": 2.4702868461608887, "learning_rate": 1.3560808538646215e-06, "loss": 0.6939, "step": 11429 }, { "epoch": 0.8046462513199577, "grad_norm": 2.383164405822754, "learning_rate": 1.355136836801588e-06, "loss": 0.6439, "step": 11430 }, { "epoch": 0.8047166490672298, "grad_norm": 1.8779642581939697, "learning_rate": 1.3541931132201038e-06, "loss": 0.656, "step": 11431 }, { "epoch": 0.804787046814502, "grad_norm": 1.9936634302139282, "learning_rate": 1.3532496831692333e-06, "loss": 0.623, "step": 11432 }, { "epoch": 0.804857444561774, "grad_norm": 2.147031307220459, "learning_rate": 1.352306546698026e-06, "loss": 0.5383, "step": 11433 }, { "epoch": 0.8049278423090461, "grad_norm": 1.7955427169799805, "learning_rate": 1.3513637038555143e-06, "loss": 0.5768, "step": 11434 }, { "epoch": 0.8049982400563181, "grad_norm": 2.253312110900879, "learning_rate": 1.350421154690721e-06, "loss": 0.6595, "step": 11435 }, { "epoch": 0.8050686378035903, "grad_norm": 2.3037917613983154, "learning_rate": 1.349478899252646e-06, "loss": 0.6808, "step": 11436 }, { "epoch": 0.8051390355508624, "grad_norm": 2.014744758605957, "learning_rate": 1.3485369375902834e-06, "loss": 0.5962, "step": 11437 }, { "epoch": 0.8052094332981344, "grad_norm": 2.178278923034668, "learning_rate": 1.3475952697526024e-06, "loss": 0.7544, "step": 11438 }, { "epoch": 0.8052798310454066, "grad_norm": 1.9399526119232178, "learning_rate": 1.3466538957885639e-06, "loss": 0.6271, "step": 11439 }, { "epoch": 0.8053502287926786, "grad_norm": 2.0367653369903564, "learning_rate": 1.3457128157471067e-06, "loss": 0.6563, "step": 11440 }, { "epoch": 0.8054206265399507, "grad_norm": 1.9427365064620972, "learning_rate": 1.3447720296771608e-06, "loss": 0.5219, "step": 11441 }, { "epoch": 0.8054910242872229, "grad_norm": 2.148848295211792, "learning_rate": 1.3438315376276405e-06, "loss": 0.5478, "step": 11442 }, { "epoch": 0.8055614220344949, "grad_norm": 2.268916368484497, "learning_rate": 1.3428913396474403e-06, "loss": 0.6939, "step": 11443 }, { "epoch": 0.805631819781767, "grad_norm": 1.8224856853485107, "learning_rate": 1.3419514357854434e-06, "loss": 0.6956, "step": 11444 }, { "epoch": 0.805702217529039, "grad_norm": 2.1273529529571533, "learning_rate": 1.3410118260905161e-06, "loss": 0.6538, "step": 11445 }, { "epoch": 0.8057726152763112, "grad_norm": 1.4929358959197998, "learning_rate": 1.3400725106115092e-06, "loss": 0.7019, "step": 11446 }, { "epoch": 0.8058430130235833, "grad_norm": 1.9759856462478638, "learning_rate": 1.3391334893972564e-06, "loss": 0.5999, "step": 11447 }, { "epoch": 0.8059134107708553, "grad_norm": 1.8308696746826172, "learning_rate": 1.3381947624965824e-06, "loss": 0.5907, "step": 11448 }, { "epoch": 0.8059838085181275, "grad_norm": 2.0598270893096924, "learning_rate": 1.337256329958288e-06, "loss": 0.6294, "step": 11449 }, { "epoch": 0.8060542062653995, "grad_norm": 2.1272287368774414, "learning_rate": 1.3363181918311676e-06, "loss": 0.6817, "step": 11450 }, { "epoch": 0.8061246040126716, "grad_norm": 2.1811113357543945, "learning_rate": 1.3353803481639934e-06, "loss": 0.7422, "step": 11451 }, { "epoch": 0.8061950017599436, "grad_norm": 1.839769959449768, "learning_rate": 1.3344427990055256e-06, "loss": 0.6125, "step": 11452 }, { "epoch": 0.8062653995072158, "grad_norm": 1.9620177745819092, "learning_rate": 1.3335055444045053e-06, "loss": 0.6828, "step": 11453 }, { "epoch": 0.8063357972544879, "grad_norm": 2.1109323501586914, "learning_rate": 1.3325685844096661e-06, "loss": 0.5473, "step": 11454 }, { "epoch": 0.8064061950017599, "grad_norm": 2.0842626094818115, "learning_rate": 1.3316319190697163e-06, "loss": 0.7059, "step": 11455 }, { "epoch": 0.806476592749032, "grad_norm": 1.6074466705322266, "learning_rate": 1.330695548433359e-06, "loss": 0.6453, "step": 11456 }, { "epoch": 0.8065469904963041, "grad_norm": 1.6488196849822998, "learning_rate": 1.3297594725492747e-06, "loss": 0.6104, "step": 11457 }, { "epoch": 0.8066173882435762, "grad_norm": 2.2520194053649902, "learning_rate": 1.3288236914661304e-06, "loss": 0.5992, "step": 11458 }, { "epoch": 0.8066877859908483, "grad_norm": 1.990192174911499, "learning_rate": 1.3278882052325765e-06, "loss": 0.7122, "step": 11459 }, { "epoch": 0.8067581837381204, "grad_norm": 1.795461893081665, "learning_rate": 1.3269530138972543e-06, "loss": 0.7402, "step": 11460 }, { "epoch": 0.8068285814853925, "grad_norm": 1.9962430000305176, "learning_rate": 1.3260181175087806e-06, "loss": 0.6179, "step": 11461 }, { "epoch": 0.8068989792326645, "grad_norm": 1.5068672895431519, "learning_rate": 1.3250835161157646e-06, "loss": 0.5987, "step": 11462 }, { "epoch": 0.8069693769799366, "grad_norm": 1.779559850692749, "learning_rate": 1.324149209766797e-06, "loss": 0.6367, "step": 11463 }, { "epoch": 0.8070397747272088, "grad_norm": 2.0289785861968994, "learning_rate": 1.3232151985104519e-06, "loss": 0.5705, "step": 11464 }, { "epoch": 0.8071101724744808, "grad_norm": 1.816357970237732, "learning_rate": 1.3222814823952884e-06, "loss": 0.6196, "step": 11465 }, { "epoch": 0.8071805702217529, "grad_norm": 1.790949821472168, "learning_rate": 1.3213480614698542e-06, "loss": 0.6506, "step": 11466 }, { "epoch": 0.807250967969025, "grad_norm": 4.385681629180908, "learning_rate": 1.3204149357826756e-06, "loss": 0.7263, "step": 11467 }, { "epoch": 0.8073213657162971, "grad_norm": 1.7642340660095215, "learning_rate": 1.3194821053822702e-06, "loss": 0.6713, "step": 11468 }, { "epoch": 0.8073917634635691, "grad_norm": 1.6287602186203003, "learning_rate": 1.3185495703171342e-06, "loss": 0.7071, "step": 11469 }, { "epoch": 0.8074621612108412, "grad_norm": 2.1964635848999023, "learning_rate": 1.3176173306357514e-06, "loss": 0.6343, "step": 11470 }, { "epoch": 0.8075325589581134, "grad_norm": 1.7177718877792358, "learning_rate": 1.3166853863865892e-06, "loss": 0.6168, "step": 11471 }, { "epoch": 0.8076029567053854, "grad_norm": 1.8409374952316284, "learning_rate": 1.3157537376181015e-06, "loss": 0.6411, "step": 11472 }, { "epoch": 0.8076733544526575, "grad_norm": 1.9458372592926025, "learning_rate": 1.3148223843787237e-06, "loss": 0.5529, "step": 11473 }, { "epoch": 0.8077437521999296, "grad_norm": 2.0199294090270996, "learning_rate": 1.31389132671688e-06, "loss": 0.6097, "step": 11474 }, { "epoch": 0.8078141499472017, "grad_norm": 1.6371279954910278, "learning_rate": 1.3129605646809769e-06, "loss": 0.5084, "step": 11475 }, { "epoch": 0.8078845476944738, "grad_norm": 1.8041237592697144, "learning_rate": 1.3120300983194039e-06, "loss": 0.6503, "step": 11476 }, { "epoch": 0.8079549454417458, "grad_norm": 1.6346466541290283, "learning_rate": 1.3110999276805354e-06, "loss": 0.6256, "step": 11477 }, { "epoch": 0.808025343189018, "grad_norm": 1.9633437395095825, "learning_rate": 1.310170052812736e-06, "loss": 0.656, "step": 11478 }, { "epoch": 0.80809574093629, "grad_norm": 2.046372652053833, "learning_rate": 1.309240473764347e-06, "loss": 0.6121, "step": 11479 }, { "epoch": 0.8081661386835621, "grad_norm": 1.907213807106018, "learning_rate": 1.3083111905837014e-06, "loss": 0.6584, "step": 11480 }, { "epoch": 0.8082365364308343, "grad_norm": 1.8404332399368286, "learning_rate": 1.307382203319111e-06, "loss": 0.7598, "step": 11481 }, { "epoch": 0.8083069341781063, "grad_norm": 1.9596598148345947, "learning_rate": 1.3064535120188757e-06, "loss": 0.7075, "step": 11482 }, { "epoch": 0.8083773319253784, "grad_norm": 2.156439781188965, "learning_rate": 1.3055251167312775e-06, "loss": 0.6135, "step": 11483 }, { "epoch": 0.8084477296726504, "grad_norm": 2.032388210296631, "learning_rate": 1.3045970175045871e-06, "loss": 0.5828, "step": 11484 }, { "epoch": 0.8085181274199226, "grad_norm": 2.067199945449829, "learning_rate": 1.3036692143870536e-06, "loss": 0.679, "step": 11485 }, { "epoch": 0.8085885251671947, "grad_norm": 2.123072624206543, "learning_rate": 1.302741707426919e-06, "loss": 0.5967, "step": 11486 }, { "epoch": 0.8086589229144667, "grad_norm": 1.8231598138809204, "learning_rate": 1.3018144966724025e-06, "loss": 0.7078, "step": 11487 }, { "epoch": 0.8087293206617389, "grad_norm": 2.068263292312622, "learning_rate": 1.3008875821717107e-06, "loss": 0.6143, "step": 11488 }, { "epoch": 0.8087997184090109, "grad_norm": 1.8325581550598145, "learning_rate": 1.2999609639730331e-06, "loss": 0.5251, "step": 11489 }, { "epoch": 0.808870116156283, "grad_norm": 1.9576624631881714, "learning_rate": 1.2990346421245474e-06, "loss": 0.6397, "step": 11490 }, { "epoch": 0.808940513903555, "grad_norm": 1.6430970430374146, "learning_rate": 1.2981086166744153e-06, "loss": 0.6618, "step": 11491 }, { "epoch": 0.8090109116508272, "grad_norm": 1.6646099090576172, "learning_rate": 1.2971828876707812e-06, "loss": 0.6274, "step": 11492 }, { "epoch": 0.8090813093980993, "grad_norm": 2.3444411754608154, "learning_rate": 1.2962574551617724e-06, "loss": 0.7122, "step": 11493 }, { "epoch": 0.8091517071453713, "grad_norm": 2.1876556873321533, "learning_rate": 1.295332319195503e-06, "loss": 0.6196, "step": 11494 }, { "epoch": 0.8092221048926435, "grad_norm": 2.060347080230713, "learning_rate": 1.2944074798200742e-06, "loss": 0.576, "step": 11495 }, { "epoch": 0.8092925026399155, "grad_norm": 2.1362125873565674, "learning_rate": 1.2934829370835662e-06, "loss": 0.6234, "step": 11496 }, { "epoch": 0.8093629003871876, "grad_norm": 1.6987309455871582, "learning_rate": 1.29255869103405e-06, "loss": 0.595, "step": 11497 }, { "epoch": 0.8094332981344597, "grad_norm": 1.5392735004425049, "learning_rate": 1.2916347417195765e-06, "loss": 0.5785, "step": 11498 }, { "epoch": 0.8095036958817318, "grad_norm": 2.345473289489746, "learning_rate": 1.2907110891881826e-06, "loss": 0.6215, "step": 11499 }, { "epoch": 0.8095740936290039, "grad_norm": 1.8447591066360474, "learning_rate": 1.2897877334878876e-06, "loss": 0.663, "step": 11500 }, { "epoch": 0.8096444913762759, "grad_norm": 1.8799978494644165, "learning_rate": 1.2888646746667022e-06, "loss": 0.6877, "step": 11501 }, { "epoch": 0.809714889123548, "grad_norm": 1.9247381687164307, "learning_rate": 1.2879419127726126e-06, "loss": 0.6403, "step": 11502 }, { "epoch": 0.8097852868708202, "grad_norm": 3.595715284347534, "learning_rate": 1.2870194478535981e-06, "loss": 0.6744, "step": 11503 }, { "epoch": 0.8098556846180922, "grad_norm": 1.8586254119873047, "learning_rate": 1.2860972799576172e-06, "loss": 0.5837, "step": 11504 }, { "epoch": 0.8099260823653643, "grad_norm": 1.894490122795105, "learning_rate": 1.2851754091326122e-06, "loss": 0.7185, "step": 11505 }, { "epoch": 0.8099964801126364, "grad_norm": 1.869681477546692, "learning_rate": 1.284253835426512e-06, "loss": 0.6021, "step": 11506 }, { "epoch": 0.8100668778599085, "grad_norm": 2.0047333240509033, "learning_rate": 1.2833325588872333e-06, "loss": 0.755, "step": 11507 }, { "epoch": 0.8101372756071805, "grad_norm": 2.0629642009735107, "learning_rate": 1.2824115795626704e-06, "loss": 0.5478, "step": 11508 }, { "epoch": 0.8102076733544527, "grad_norm": 1.7991596460342407, "learning_rate": 1.2814908975007094e-06, "loss": 0.5634, "step": 11509 }, { "epoch": 0.8102780711017248, "grad_norm": 1.75753915309906, "learning_rate": 1.2805705127492153e-06, "loss": 0.6498, "step": 11510 }, { "epoch": 0.8103484688489968, "grad_norm": 1.738338828086853, "learning_rate": 1.2796504253560406e-06, "loss": 0.6032, "step": 11511 }, { "epoch": 0.8104188665962689, "grad_norm": 1.752328872680664, "learning_rate": 1.2787306353690188e-06, "loss": 0.6024, "step": 11512 }, { "epoch": 0.810489264343541, "grad_norm": 1.9830378293991089, "learning_rate": 1.277811142835975e-06, "loss": 0.6042, "step": 11513 }, { "epoch": 0.8105596620908131, "grad_norm": 2.1890339851379395, "learning_rate": 1.2768919478047098e-06, "loss": 0.603, "step": 11514 }, { "epoch": 0.8106300598380852, "grad_norm": 1.7706308364868164, "learning_rate": 1.2759730503230177e-06, "loss": 0.567, "step": 11515 }, { "epoch": 0.8107004575853572, "grad_norm": 1.9652307033538818, "learning_rate": 1.2750544504386703e-06, "loss": 0.6365, "step": 11516 }, { "epoch": 0.8107708553326294, "grad_norm": 1.6745883226394653, "learning_rate": 1.274136148199427e-06, "loss": 0.7114, "step": 11517 }, { "epoch": 0.8108412530799014, "grad_norm": 1.653090000152588, "learning_rate": 1.2732181436530294e-06, "loss": 0.7448, "step": 11518 }, { "epoch": 0.8109116508271735, "grad_norm": 1.911849856376648, "learning_rate": 1.2723004368472087e-06, "loss": 0.6709, "step": 11519 }, { "epoch": 0.8109820485744457, "grad_norm": 1.6923558712005615, "learning_rate": 1.271383027829673e-06, "loss": 0.6709, "step": 11520 }, { "epoch": 0.8110524463217177, "grad_norm": 2.108186721801758, "learning_rate": 1.2704659166481248e-06, "loss": 0.6089, "step": 11521 }, { "epoch": 0.8111228440689898, "grad_norm": 2.3524858951568604, "learning_rate": 1.2695491033502416e-06, "loss": 0.701, "step": 11522 }, { "epoch": 0.8111932418162618, "grad_norm": 2.10211181640625, "learning_rate": 1.268632587983691e-06, "loss": 0.6011, "step": 11523 }, { "epoch": 0.811263639563534, "grad_norm": 2.020643949508667, "learning_rate": 1.2677163705961204e-06, "loss": 0.7034, "step": 11524 }, { "epoch": 0.811334037310806, "grad_norm": 2.0158276557922363, "learning_rate": 1.26680045123517e-06, "loss": 0.7104, "step": 11525 }, { "epoch": 0.8114044350580781, "grad_norm": 1.8085435628890991, "learning_rate": 1.2658848299484537e-06, "loss": 0.5591, "step": 11526 }, { "epoch": 0.8114748328053503, "grad_norm": 3.403820276260376, "learning_rate": 1.2649695067835801e-06, "loss": 0.6539, "step": 11527 }, { "epoch": 0.8115452305526223, "grad_norm": 1.7470988035202026, "learning_rate": 1.2640544817881363e-06, "loss": 0.5611, "step": 11528 }, { "epoch": 0.8116156282998944, "grad_norm": 1.7625483274459839, "learning_rate": 1.263139755009694e-06, "loss": 0.6464, "step": 11529 }, { "epoch": 0.8116860260471664, "grad_norm": 2.4934773445129395, "learning_rate": 1.2622253264958107e-06, "loss": 0.7301, "step": 11530 }, { "epoch": 0.8117564237944386, "grad_norm": 2.111682176589966, "learning_rate": 1.2613111962940295e-06, "loss": 0.6534, "step": 11531 }, { "epoch": 0.8118268215417107, "grad_norm": 2.081493616104126, "learning_rate": 1.2603973644518762e-06, "loss": 0.6562, "step": 11532 }, { "epoch": 0.8118972192889827, "grad_norm": 2.2230446338653564, "learning_rate": 1.2594838310168626e-06, "loss": 0.65, "step": 11533 }, { "epoch": 0.8119676170362549, "grad_norm": 9.53791332244873, "learning_rate": 1.258570596036484e-06, "loss": 0.6742, "step": 11534 }, { "epoch": 0.8120380147835269, "grad_norm": 2.2004709243774414, "learning_rate": 1.25765765955822e-06, "loss": 0.531, "step": 11535 }, { "epoch": 0.812108412530799, "grad_norm": 2.046013832092285, "learning_rate": 1.2567450216295332e-06, "loss": 0.7765, "step": 11536 }, { "epoch": 0.8121788102780712, "grad_norm": 1.738359808921814, "learning_rate": 1.2558326822978757e-06, "loss": 0.6579, "step": 11537 }, { "epoch": 0.8122492080253432, "grad_norm": 1.7597792148590088, "learning_rate": 1.2549206416106767e-06, "loss": 0.6666, "step": 11538 }, { "epoch": 0.8123196057726153, "grad_norm": 1.6818815469741821, "learning_rate": 1.2540088996153599e-06, "loss": 0.6766, "step": 11539 }, { "epoch": 0.8123900035198873, "grad_norm": 1.7099568843841553, "learning_rate": 1.253097456359323e-06, "loss": 0.5297, "step": 11540 }, { "epoch": 0.8124604012671595, "grad_norm": 2.2062594890594482, "learning_rate": 1.2521863118899528e-06, "loss": 0.6868, "step": 11541 }, { "epoch": 0.8125307990144316, "grad_norm": 1.7982800006866455, "learning_rate": 1.2512754662546235e-06, "loss": 0.5958, "step": 11542 }, { "epoch": 0.8126011967617036, "grad_norm": 2.1656923294067383, "learning_rate": 1.2503649195006874e-06, "loss": 0.6736, "step": 11543 }, { "epoch": 0.8126715945089757, "grad_norm": 1.7081496715545654, "learning_rate": 1.249454671675488e-06, "loss": 0.6378, "step": 11544 }, { "epoch": 0.8127419922562478, "grad_norm": 1.7475199699401855, "learning_rate": 1.2485447228263465e-06, "loss": 0.6264, "step": 11545 }, { "epoch": 0.8128123900035199, "grad_norm": 1.8914319276809692, "learning_rate": 1.2476350730005753e-06, "loss": 0.6703, "step": 11546 }, { "epoch": 0.8128827877507919, "grad_norm": 1.6785622835159302, "learning_rate": 1.2467257222454665e-06, "loss": 0.6658, "step": 11547 }, { "epoch": 0.8129531854980641, "grad_norm": 2.1364166736602783, "learning_rate": 1.2458166706082973e-06, "loss": 0.7402, "step": 11548 }, { "epoch": 0.8130235832453362, "grad_norm": 2.2987780570983887, "learning_rate": 1.24490791813633e-06, "loss": 0.6955, "step": 11549 }, { "epoch": 0.8130939809926082, "grad_norm": 1.838543176651001, "learning_rate": 1.2439994648768128e-06, "loss": 0.5564, "step": 11550 }, { "epoch": 0.8131643787398803, "grad_norm": 2.079221248626709, "learning_rate": 1.2430913108769756e-06, "loss": 0.7025, "step": 11551 }, { "epoch": 0.8132347764871524, "grad_norm": 2.5970427989959717, "learning_rate": 1.2421834561840355e-06, "loss": 0.5461, "step": 11552 }, { "epoch": 0.8133051742344245, "grad_norm": 2.220283031463623, "learning_rate": 1.241275900845193e-06, "loss": 0.6992, "step": 11553 }, { "epoch": 0.8133755719816966, "grad_norm": 1.7730604410171509, "learning_rate": 1.2403686449076316e-06, "loss": 0.6347, "step": 11554 }, { "epoch": 0.8134459697289687, "grad_norm": 1.8959667682647705, "learning_rate": 1.2394616884185184e-06, "loss": 0.6419, "step": 11555 }, { "epoch": 0.8135163674762408, "grad_norm": 2.122929096221924, "learning_rate": 1.2385550314250116e-06, "loss": 0.6388, "step": 11556 }, { "epoch": 0.8135867652235128, "grad_norm": 1.9649137258529663, "learning_rate": 1.2376486739742445e-06, "loss": 0.6162, "step": 11557 }, { "epoch": 0.8136571629707849, "grad_norm": 2.0050625801086426, "learning_rate": 1.2367426161133438e-06, "loss": 0.6739, "step": 11558 }, { "epoch": 0.8137275607180571, "grad_norm": 1.7007052898406982, "learning_rate": 1.2358368578894132e-06, "loss": 0.6788, "step": 11559 }, { "epoch": 0.8137979584653291, "grad_norm": 1.9958778619766235, "learning_rate": 1.2349313993495452e-06, "loss": 0.6938, "step": 11560 }, { "epoch": 0.8138683562126012, "grad_norm": 1.8454171419143677, "learning_rate": 1.2340262405408128e-06, "loss": 0.6621, "step": 11561 }, { "epoch": 0.8139387539598733, "grad_norm": 1.7732059955596924, "learning_rate": 1.23312138151028e-06, "loss": 0.6974, "step": 11562 }, { "epoch": 0.8140091517071454, "grad_norm": 1.8272533416748047, "learning_rate": 1.2322168223049872e-06, "loss": 0.7026, "step": 11563 }, { "epoch": 0.8140795494544174, "grad_norm": 1.7943578958511353, "learning_rate": 1.2313125629719678e-06, "loss": 0.696, "step": 11564 }, { "epoch": 0.8141499472016895, "grad_norm": 2.223125457763672, "learning_rate": 1.2304086035582327e-06, "loss": 0.609, "step": 11565 }, { "epoch": 0.8142203449489617, "grad_norm": 1.8067569732666016, "learning_rate": 1.2295049441107792e-06, "loss": 0.5971, "step": 11566 }, { "epoch": 0.8142907426962337, "grad_norm": 2.025108575820923, "learning_rate": 1.2286015846765883e-06, "loss": 0.6255, "step": 11567 }, { "epoch": 0.8143611404435058, "grad_norm": 1.8220270872116089, "learning_rate": 1.2276985253026292e-06, "loss": 0.6164, "step": 11568 }, { "epoch": 0.8144315381907778, "grad_norm": 2.289609670639038, "learning_rate": 1.22679576603585e-06, "loss": 0.65, "step": 11569 }, { "epoch": 0.81450193593805, "grad_norm": 2.29123592376709, "learning_rate": 1.225893306923189e-06, "loss": 0.7178, "step": 11570 }, { "epoch": 0.8145723336853221, "grad_norm": 2.010111093521118, "learning_rate": 1.2249911480115643e-06, "loss": 0.6918, "step": 11571 }, { "epoch": 0.8146427314325941, "grad_norm": 2.303853988647461, "learning_rate": 1.2240892893478803e-06, "loss": 0.7328, "step": 11572 }, { "epoch": 0.8147131291798663, "grad_norm": 1.9864588975906372, "learning_rate": 1.2231877309790235e-06, "loss": 0.7104, "step": 11573 }, { "epoch": 0.8147835269271383, "grad_norm": 1.9669435024261475, "learning_rate": 1.2222864729518696e-06, "loss": 0.5577, "step": 11574 }, { "epoch": 0.8148539246744104, "grad_norm": 2.230881452560425, "learning_rate": 1.2213855153132734e-06, "loss": 0.6005, "step": 11575 }, { "epoch": 0.8149243224216826, "grad_norm": 2.199880838394165, "learning_rate": 1.2204848581100786e-06, "loss": 0.5744, "step": 11576 }, { "epoch": 0.8149947201689546, "grad_norm": 2.576234817504883, "learning_rate": 1.2195845013891108e-06, "loss": 0.7194, "step": 11577 }, { "epoch": 0.8150651179162267, "grad_norm": 2.048187732696533, "learning_rate": 1.21868444519718e-06, "loss": 0.6869, "step": 11578 }, { "epoch": 0.8151355156634987, "grad_norm": 1.7232043743133545, "learning_rate": 1.2177846895810784e-06, "loss": 0.6313, "step": 11579 }, { "epoch": 0.8152059134107709, "grad_norm": 2.1057658195495605, "learning_rate": 1.2168852345875897e-06, "loss": 0.6445, "step": 11580 }, { "epoch": 0.8152763111580429, "grad_norm": 1.8547859191894531, "learning_rate": 1.2159860802634734e-06, "loss": 0.6581, "step": 11581 }, { "epoch": 0.815346708905315, "grad_norm": 2.4711365699768066, "learning_rate": 1.2150872266554812e-06, "loss": 0.6945, "step": 11582 }, { "epoch": 0.8154171066525872, "grad_norm": 2.2204113006591797, "learning_rate": 1.2141886738103423e-06, "loss": 0.6832, "step": 11583 }, { "epoch": 0.8154875043998592, "grad_norm": 1.8834255933761597, "learning_rate": 1.2132904217747744e-06, "loss": 0.5746, "step": 11584 }, { "epoch": 0.8155579021471313, "grad_norm": 1.839613676071167, "learning_rate": 1.2123924705954771e-06, "loss": 0.6089, "step": 11585 }, { "epoch": 0.8156282998944033, "grad_norm": 1.8236157894134521, "learning_rate": 1.2114948203191385e-06, "loss": 0.6159, "step": 11586 }, { "epoch": 0.8156986976416755, "grad_norm": 2.053830623626709, "learning_rate": 1.210597470992424e-06, "loss": 0.5938, "step": 11587 }, { "epoch": 0.8157690953889476, "grad_norm": 1.770499348640442, "learning_rate": 1.2097004226619925e-06, "loss": 0.6286, "step": 11588 }, { "epoch": 0.8158394931362196, "grad_norm": 1.8678058385849, "learning_rate": 1.2088036753744801e-06, "loss": 0.6796, "step": 11589 }, { "epoch": 0.8159098908834918, "grad_norm": 2.9591104984283447, "learning_rate": 1.2079072291765096e-06, "loss": 0.6972, "step": 11590 }, { "epoch": 0.8159802886307638, "grad_norm": 2.063809633255005, "learning_rate": 1.2070110841146857e-06, "loss": 0.6629, "step": 11591 }, { "epoch": 0.8160506863780359, "grad_norm": 2.6836938858032227, "learning_rate": 1.2061152402356024e-06, "loss": 0.7265, "step": 11592 }, { "epoch": 0.816121084125308, "grad_norm": 2.451695203781128, "learning_rate": 1.2052196975858363e-06, "loss": 0.6906, "step": 11593 }, { "epoch": 0.8161914818725801, "grad_norm": 2.039898157119751, "learning_rate": 1.2043244562119465e-06, "loss": 0.6292, "step": 11594 }, { "epoch": 0.8162618796198522, "grad_norm": 1.7124994993209839, "learning_rate": 1.203429516160477e-06, "loss": 0.6843, "step": 11595 }, { "epoch": 0.8163322773671242, "grad_norm": 1.8479670286178589, "learning_rate": 1.2025348774779548e-06, "loss": 0.601, "step": 11596 }, { "epoch": 0.8164026751143963, "grad_norm": 2.4289841651916504, "learning_rate": 1.2016405402108963e-06, "loss": 0.6458, "step": 11597 }, { "epoch": 0.8164730728616685, "grad_norm": 2.0979530811309814, "learning_rate": 1.2007465044057958e-06, "loss": 0.6587, "step": 11598 }, { "epoch": 0.8165434706089405, "grad_norm": 1.8725982904434204, "learning_rate": 1.1998527701091388e-06, "loss": 0.572, "step": 11599 }, { "epoch": 0.8166138683562126, "grad_norm": 2.0733070373535156, "learning_rate": 1.1989593373673894e-06, "loss": 0.5965, "step": 11600 }, { "epoch": 0.8166842661034847, "grad_norm": 1.7516764402389526, "learning_rate": 1.198066206226997e-06, "loss": 0.5996, "step": 11601 }, { "epoch": 0.8167546638507568, "grad_norm": 2.3231043815612793, "learning_rate": 1.1971733767343961e-06, "loss": 0.6427, "step": 11602 }, { "epoch": 0.8168250615980288, "grad_norm": 1.9291763305664062, "learning_rate": 1.196280848936008e-06, "loss": 0.6075, "step": 11603 }, { "epoch": 0.816895459345301, "grad_norm": 1.9735145568847656, "learning_rate": 1.1953886228782324e-06, "loss": 0.5452, "step": 11604 }, { "epoch": 0.8169658570925731, "grad_norm": 1.5139546394348145, "learning_rate": 1.194496698607462e-06, "loss": 0.5184, "step": 11605 }, { "epoch": 0.8170362548398451, "grad_norm": 1.8908846378326416, "learning_rate": 1.1936050761700657e-06, "loss": 0.5399, "step": 11606 }, { "epoch": 0.8171066525871172, "grad_norm": 2.7195045948028564, "learning_rate": 1.1927137556123998e-06, "loss": 0.703, "step": 11607 }, { "epoch": 0.8171770503343893, "grad_norm": 1.942753553390503, "learning_rate": 1.1918227369808034e-06, "loss": 0.6607, "step": 11608 }, { "epoch": 0.8172474480816614, "grad_norm": 1.911805272102356, "learning_rate": 1.190932020321605e-06, "loss": 0.5877, "step": 11609 }, { "epoch": 0.8173178458289335, "grad_norm": 1.8059594631195068, "learning_rate": 1.1900416056811107e-06, "loss": 0.6613, "step": 11610 }, { "epoch": 0.8173882435762055, "grad_norm": 1.9994440078735352, "learning_rate": 1.1891514931056164e-06, "loss": 0.6848, "step": 11611 }, { "epoch": 0.8174586413234777, "grad_norm": 2.3009605407714844, "learning_rate": 1.1882616826413988e-06, "loss": 0.6667, "step": 11612 }, { "epoch": 0.8175290390707497, "grad_norm": 1.8259732723236084, "learning_rate": 1.1873721743347194e-06, "loss": 0.614, "step": 11613 }, { "epoch": 0.8175994368180218, "grad_norm": 2.161604881286621, "learning_rate": 1.1864829682318228e-06, "loss": 0.6824, "step": 11614 }, { "epoch": 0.817669834565294, "grad_norm": 1.6888447999954224, "learning_rate": 1.185594064378944e-06, "loss": 0.651, "step": 11615 }, { "epoch": 0.817740232312566, "grad_norm": 1.9729787111282349, "learning_rate": 1.1847054628222938e-06, "loss": 0.6977, "step": 11616 }, { "epoch": 0.8178106300598381, "grad_norm": 1.8046672344207764, "learning_rate": 1.1838171636080747e-06, "loss": 0.6296, "step": 11617 }, { "epoch": 0.8178810278071101, "grad_norm": 1.612300157546997, "learning_rate": 1.1829291667824694e-06, "loss": 0.6065, "step": 11618 }, { "epoch": 0.8179514255543823, "grad_norm": 2.0749127864837646, "learning_rate": 1.1820414723916443e-06, "loss": 0.6673, "step": 11619 }, { "epoch": 0.8180218233016543, "grad_norm": 2.0135419368743896, "learning_rate": 1.18115408048175e-06, "loss": 0.6853, "step": 11620 }, { "epoch": 0.8180922210489264, "grad_norm": 1.7360681295394897, "learning_rate": 1.1802669910989281e-06, "loss": 0.6558, "step": 11621 }, { "epoch": 0.8181626187961986, "grad_norm": 1.9508566856384277, "learning_rate": 1.1793802042892932e-06, "loss": 0.5821, "step": 11622 }, { "epoch": 0.8182330165434706, "grad_norm": 1.995926022529602, "learning_rate": 1.1784937200989547e-06, "loss": 0.6742, "step": 11623 }, { "epoch": 0.8183034142907427, "grad_norm": 2.918591260910034, "learning_rate": 1.1776075385739997e-06, "loss": 0.673, "step": 11624 }, { "epoch": 0.8183738120380147, "grad_norm": 1.7965044975280762, "learning_rate": 1.1767216597605026e-06, "loss": 0.5724, "step": 11625 }, { "epoch": 0.8184442097852869, "grad_norm": 2.1743597984313965, "learning_rate": 1.1758360837045185e-06, "loss": 0.5886, "step": 11626 }, { "epoch": 0.818514607532559, "grad_norm": 1.8178746700286865, "learning_rate": 1.1749508104520922e-06, "loss": 0.609, "step": 11627 }, { "epoch": 0.818585005279831, "grad_norm": 1.6995210647583008, "learning_rate": 1.1740658400492478e-06, "loss": 0.7302, "step": 11628 }, { "epoch": 0.8186554030271032, "grad_norm": 2.1178205013275146, "learning_rate": 1.1731811725419977e-06, "loss": 0.613, "step": 11629 }, { "epoch": 0.8187258007743752, "grad_norm": 1.94413423538208, "learning_rate": 1.1722968079763359e-06, "loss": 0.6067, "step": 11630 }, { "epoch": 0.8187961985216473, "grad_norm": 1.8624835014343262, "learning_rate": 1.1714127463982402e-06, "loss": 0.6417, "step": 11631 }, { "epoch": 0.8188665962689194, "grad_norm": 2.076371908187866, "learning_rate": 1.1705289878536738e-06, "loss": 0.6794, "step": 11632 }, { "epoch": 0.8189369940161915, "grad_norm": 1.947147250175476, "learning_rate": 1.1696455323885859e-06, "loss": 0.6276, "step": 11633 }, { "epoch": 0.8190073917634636, "grad_norm": 1.870434045791626, "learning_rate": 1.1687623800489058e-06, "loss": 0.5995, "step": 11634 }, { "epoch": 0.8190777895107356, "grad_norm": 1.5979231595993042, "learning_rate": 1.1678795308805519e-06, "loss": 0.689, "step": 11635 }, { "epoch": 0.8191481872580078, "grad_norm": 1.909206748008728, "learning_rate": 1.1669969849294235e-06, "loss": 0.597, "step": 11636 }, { "epoch": 0.8192185850052798, "grad_norm": 1.7351129055023193, "learning_rate": 1.166114742241404e-06, "loss": 0.633, "step": 11637 }, { "epoch": 0.8192889827525519, "grad_norm": 1.86285400390625, "learning_rate": 1.1652328028623607e-06, "loss": 0.5571, "step": 11638 }, { "epoch": 0.819359380499824, "grad_norm": 1.7373600006103516, "learning_rate": 1.1643511668381507e-06, "loss": 0.5931, "step": 11639 }, { "epoch": 0.8194297782470961, "grad_norm": 1.9270728826522827, "learning_rate": 1.1634698342146064e-06, "loss": 0.62, "step": 11640 }, { "epoch": 0.8195001759943682, "grad_norm": 1.7736799716949463, "learning_rate": 1.1625888050375511e-06, "loss": 0.6406, "step": 11641 }, { "epoch": 0.8195705737416402, "grad_norm": 1.9107613563537598, "learning_rate": 1.1617080793527945e-06, "loss": 0.6347, "step": 11642 }, { "epoch": 0.8196409714889124, "grad_norm": 1.8354227542877197, "learning_rate": 1.1608276572061185e-06, "loss": 0.6086, "step": 11643 }, { "epoch": 0.8197113692361845, "grad_norm": 1.9200222492218018, "learning_rate": 1.1599475386433033e-06, "loss": 0.5529, "step": 11644 }, { "epoch": 0.8197817669834565, "grad_norm": 1.9411100149154663, "learning_rate": 1.1590677237101024e-06, "loss": 0.7736, "step": 11645 }, { "epoch": 0.8198521647307286, "grad_norm": 1.8973581790924072, "learning_rate": 1.158188212452262e-06, "loss": 0.7894, "step": 11646 }, { "epoch": 0.8199225624780007, "grad_norm": 2.3564324378967285, "learning_rate": 1.157309004915505e-06, "loss": 0.6243, "step": 11647 }, { "epoch": 0.8199929602252728, "grad_norm": 2.3475093841552734, "learning_rate": 1.156430101145548e-06, "loss": 0.7066, "step": 11648 }, { "epoch": 0.8200633579725449, "grad_norm": 2.0157594680786133, "learning_rate": 1.1555515011880796e-06, "loss": 0.6768, "step": 11649 }, { "epoch": 0.820133755719817, "grad_norm": 1.9498379230499268, "learning_rate": 1.154673205088783e-06, "loss": 0.6627, "step": 11650 }, { "epoch": 0.8202041534670891, "grad_norm": 1.7277573347091675, "learning_rate": 1.1537952128933179e-06, "loss": 0.6338, "step": 11651 }, { "epoch": 0.8202745512143611, "grad_norm": 1.9233089685440063, "learning_rate": 1.1529175246473367e-06, "loss": 0.7906, "step": 11652 }, { "epoch": 0.8203449489616332, "grad_norm": 1.9683926105499268, "learning_rate": 1.152040140396467e-06, "loss": 0.5872, "step": 11653 }, { "epoch": 0.8204153467089054, "grad_norm": 2.1079487800598145, "learning_rate": 1.1511630601863285e-06, "loss": 0.5552, "step": 11654 }, { "epoch": 0.8204857444561774, "grad_norm": 2.3553500175476074, "learning_rate": 1.150286284062519e-06, "loss": 0.5477, "step": 11655 }, { "epoch": 0.8205561422034495, "grad_norm": 2.0819571018218994, "learning_rate": 1.1494098120706237e-06, "loss": 0.6941, "step": 11656 }, { "epoch": 0.8206265399507215, "grad_norm": 2.000378131866455, "learning_rate": 1.1485336442562096e-06, "loss": 0.7107, "step": 11657 }, { "epoch": 0.8206969376979937, "grad_norm": 2.426657199859619, "learning_rate": 1.1476577806648323e-06, "loss": 0.6392, "step": 11658 }, { "epoch": 0.8207673354452657, "grad_norm": 2.0434834957122803, "learning_rate": 1.146782221342025e-06, "loss": 0.7524, "step": 11659 }, { "epoch": 0.8208377331925378, "grad_norm": 2.2999584674835205, "learning_rate": 1.1459069663333125e-06, "loss": 0.7193, "step": 11660 }, { "epoch": 0.82090813093981, "grad_norm": 1.9935555458068848, "learning_rate": 1.1450320156841992e-06, "loss": 0.7098, "step": 11661 }, { "epoch": 0.820978528687082, "grad_norm": 2.2103447914123535, "learning_rate": 1.1441573694401743e-06, "loss": 0.6342, "step": 11662 }, { "epoch": 0.8210489264343541, "grad_norm": 1.6640064716339111, "learning_rate": 1.1432830276467087e-06, "loss": 0.644, "step": 11663 }, { "epoch": 0.8211193241816261, "grad_norm": 2.4066553115844727, "learning_rate": 1.1424089903492644e-06, "loss": 0.7027, "step": 11664 }, { "epoch": 0.8211897219288983, "grad_norm": 1.819202184677124, "learning_rate": 1.1415352575932802e-06, "loss": 0.5361, "step": 11665 }, { "epoch": 0.8212601196761704, "grad_norm": 2.066023111343384, "learning_rate": 1.1406618294241855e-06, "loss": 0.5837, "step": 11666 }, { "epoch": 0.8213305174234424, "grad_norm": 1.765192985534668, "learning_rate": 1.1397887058873886e-06, "loss": 0.7189, "step": 11667 }, { "epoch": 0.8214009151707146, "grad_norm": 1.9115898609161377, "learning_rate": 1.1389158870282843e-06, "loss": 0.6384, "step": 11668 }, { "epoch": 0.8214713129179866, "grad_norm": 1.8058311939239502, "learning_rate": 1.1380433728922488e-06, "loss": 0.5812, "step": 11669 }, { "epoch": 0.8215417106652587, "grad_norm": 1.7405433654785156, "learning_rate": 1.1371711635246497e-06, "loss": 0.5849, "step": 11670 }, { "epoch": 0.8216121084125309, "grad_norm": 2.5721728801727295, "learning_rate": 1.13629925897083e-06, "loss": 0.6882, "step": 11671 }, { "epoch": 0.8216825061598029, "grad_norm": 2.3350727558135986, "learning_rate": 1.1354276592761237e-06, "loss": 0.6713, "step": 11672 }, { "epoch": 0.821752903907075, "grad_norm": 1.6434913873672485, "learning_rate": 1.1345563644858441e-06, "loss": 0.5789, "step": 11673 }, { "epoch": 0.821823301654347, "grad_norm": 2.236314535140991, "learning_rate": 1.133685374645292e-06, "loss": 0.6318, "step": 11674 }, { "epoch": 0.8218936994016192, "grad_norm": 1.7385880947113037, "learning_rate": 1.1328146897997482e-06, "loss": 0.6133, "step": 11675 }, { "epoch": 0.8219640971488912, "grad_norm": 1.7876893281936646, "learning_rate": 1.1319443099944838e-06, "loss": 0.7381, "step": 11676 }, { "epoch": 0.8220344948961633, "grad_norm": 2.2851662635803223, "learning_rate": 1.1310742352747477e-06, "loss": 0.611, "step": 11677 }, { "epoch": 0.8221048926434354, "grad_norm": 1.958611249923706, "learning_rate": 1.1302044656857782e-06, "loss": 0.6998, "step": 11678 }, { "epoch": 0.8221752903907075, "grad_norm": 2.0010173320770264, "learning_rate": 1.1293350012727958e-06, "loss": 0.6683, "step": 11679 }, { "epoch": 0.8222456881379796, "grad_norm": 1.941108226776123, "learning_rate": 1.1284658420810029e-06, "loss": 0.6569, "step": 11680 }, { "epoch": 0.8223160858852516, "grad_norm": 1.8926050662994385, "learning_rate": 1.1275969881555867e-06, "loss": 0.6688, "step": 11681 }, { "epoch": 0.8223864836325238, "grad_norm": 1.9495124816894531, "learning_rate": 1.1267284395417228e-06, "loss": 0.6526, "step": 11682 }, { "epoch": 0.8224568813797959, "grad_norm": 1.7502614259719849, "learning_rate": 1.1258601962845653e-06, "loss": 0.6236, "step": 11683 }, { "epoch": 0.8225272791270679, "grad_norm": 2.134036064147949, "learning_rate": 1.124992258429258e-06, "loss": 0.6456, "step": 11684 }, { "epoch": 0.82259767687434, "grad_norm": 1.8293125629425049, "learning_rate": 1.1241246260209235e-06, "loss": 0.5512, "step": 11685 }, { "epoch": 0.8226680746216121, "grad_norm": 2.217392683029175, "learning_rate": 1.1232572991046712e-06, "loss": 0.6007, "step": 11686 }, { "epoch": 0.8227384723688842, "grad_norm": 1.95255708694458, "learning_rate": 1.1223902777255924e-06, "loss": 0.6724, "step": 11687 }, { "epoch": 0.8228088701161563, "grad_norm": 1.8877983093261719, "learning_rate": 1.121523561928768e-06, "loss": 0.7683, "step": 11688 }, { "epoch": 0.8228792678634284, "grad_norm": 1.9690465927124023, "learning_rate": 1.1206571517592558e-06, "loss": 0.673, "step": 11689 }, { "epoch": 0.8229496656107005, "grad_norm": 2.526233673095703, "learning_rate": 1.119791047262105e-06, "loss": 0.692, "step": 11690 }, { "epoch": 0.8230200633579725, "grad_norm": 1.7170294523239136, "learning_rate": 1.1189252484823422e-06, "loss": 0.6456, "step": 11691 }, { "epoch": 0.8230904611052446, "grad_norm": 1.7564340829849243, "learning_rate": 1.1180597554649812e-06, "loss": 0.6241, "step": 11692 }, { "epoch": 0.8231608588525168, "grad_norm": 1.7787857055664062, "learning_rate": 1.1171945682550218e-06, "loss": 0.5711, "step": 11693 }, { "epoch": 0.8232312565997888, "grad_norm": 2.1169326305389404, "learning_rate": 1.1163296868974425e-06, "loss": 0.6566, "step": 11694 }, { "epoch": 0.8233016543470609, "grad_norm": 2.73169207572937, "learning_rate": 1.1154651114372135e-06, "loss": 0.7159, "step": 11695 }, { "epoch": 0.823372052094333, "grad_norm": 2.1130974292755127, "learning_rate": 1.1146008419192826e-06, "loss": 0.6504, "step": 11696 }, { "epoch": 0.8234424498416051, "grad_norm": 2.0473294258117676, "learning_rate": 1.113736878388584e-06, "loss": 0.7201, "step": 11697 }, { "epoch": 0.8235128475888771, "grad_norm": 1.770151138305664, "learning_rate": 1.1128732208900343e-06, "loss": 0.658, "step": 11698 }, { "epoch": 0.8235832453361492, "grad_norm": 2.0844058990478516, "learning_rate": 1.1120098694685397e-06, "loss": 0.7483, "step": 11699 }, { "epoch": 0.8236536430834214, "grad_norm": 2.0410683155059814, "learning_rate": 1.111146824168982e-06, "loss": 0.7685, "step": 11700 }, { "epoch": 0.8237240408306934, "grad_norm": 2.266251802444458, "learning_rate": 1.1102840850362364e-06, "loss": 0.5886, "step": 11701 }, { "epoch": 0.8237944385779655, "grad_norm": 1.7778346538543701, "learning_rate": 1.1094216521151549e-06, "loss": 0.5778, "step": 11702 }, { "epoch": 0.8238648363252375, "grad_norm": 1.8601349592208862, "learning_rate": 1.1085595254505772e-06, "loss": 0.6656, "step": 11703 }, { "epoch": 0.8239352340725097, "grad_norm": 2.3582122325897217, "learning_rate": 1.1076977050873226e-06, "loss": 0.6299, "step": 11704 }, { "epoch": 0.8240056318197818, "grad_norm": 1.8820184469223022, "learning_rate": 1.1068361910702034e-06, "loss": 0.6782, "step": 11705 }, { "epoch": 0.8240760295670538, "grad_norm": 1.8860328197479248, "learning_rate": 1.1059749834440056e-06, "loss": 0.6203, "step": 11706 }, { "epoch": 0.824146427314326, "grad_norm": 2.159929037094116, "learning_rate": 1.1051140822535081e-06, "loss": 0.6898, "step": 11707 }, { "epoch": 0.824216825061598, "grad_norm": 2.3911125659942627, "learning_rate": 1.1042534875434683e-06, "loss": 0.6865, "step": 11708 }, { "epoch": 0.8242872228088701, "grad_norm": 2.02652907371521, "learning_rate": 1.1033931993586292e-06, "loss": 0.7102, "step": 11709 }, { "epoch": 0.8243576205561423, "grad_norm": 1.9419628381729126, "learning_rate": 1.1025332177437162e-06, "loss": 0.6203, "step": 11710 }, { "epoch": 0.8244280183034143, "grad_norm": 1.7641956806182861, "learning_rate": 1.101673542743444e-06, "loss": 0.7279, "step": 11711 }, { "epoch": 0.8244984160506864, "grad_norm": 1.7892591953277588, "learning_rate": 1.100814174402504e-06, "loss": 0.6101, "step": 11712 }, { "epoch": 0.8245688137979584, "grad_norm": 1.9028633832931519, "learning_rate": 1.0999551127655798e-06, "loss": 0.612, "step": 11713 }, { "epoch": 0.8246392115452306, "grad_norm": 1.7455382347106934, "learning_rate": 1.0990963578773332e-06, "loss": 0.672, "step": 11714 }, { "epoch": 0.8247096092925026, "grad_norm": 2.082840919494629, "learning_rate": 1.0982379097824102e-06, "loss": 0.5868, "step": 11715 }, { "epoch": 0.8247800070397747, "grad_norm": 1.9009650945663452, "learning_rate": 1.097379768525442e-06, "loss": 0.6589, "step": 11716 }, { "epoch": 0.8248504047870469, "grad_norm": 2.271049737930298, "learning_rate": 1.0965219341510473e-06, "loss": 0.667, "step": 11717 }, { "epoch": 0.8249208025343189, "grad_norm": 2.0151166915893555, "learning_rate": 1.0956644067038213e-06, "loss": 0.6531, "step": 11718 }, { "epoch": 0.824991200281591, "grad_norm": 1.9394114017486572, "learning_rate": 1.0948071862283521e-06, "loss": 0.6799, "step": 11719 }, { "epoch": 0.825061598028863, "grad_norm": 1.938377857208252, "learning_rate": 1.0939502727692061e-06, "loss": 0.6703, "step": 11720 }, { "epoch": 0.8251319957761352, "grad_norm": 2.030181646347046, "learning_rate": 1.0930936663709336e-06, "loss": 0.6156, "step": 11721 }, { "epoch": 0.8252023935234073, "grad_norm": 1.8705270290374756, "learning_rate": 1.0922373670780694e-06, "loss": 0.5955, "step": 11722 }, { "epoch": 0.8252727912706793, "grad_norm": 2.1540346145629883, "learning_rate": 1.0913813749351363e-06, "loss": 0.5968, "step": 11723 }, { "epoch": 0.8253431890179515, "grad_norm": 2.24743390083313, "learning_rate": 1.0905256899866347e-06, "loss": 0.6759, "step": 11724 }, { "epoch": 0.8254135867652235, "grad_norm": 2.3311219215393066, "learning_rate": 1.089670312277057e-06, "loss": 0.7778, "step": 11725 }, { "epoch": 0.8254839845124956, "grad_norm": 1.687445878982544, "learning_rate": 1.088815241850872e-06, "loss": 0.5577, "step": 11726 }, { "epoch": 0.8255543822597677, "grad_norm": 2.0636613368988037, "learning_rate": 1.0879604787525363e-06, "loss": 0.6343, "step": 11727 }, { "epoch": 0.8256247800070398, "grad_norm": 1.8136405944824219, "learning_rate": 1.0871060230264874e-06, "loss": 0.7201, "step": 11728 }, { "epoch": 0.8256951777543119, "grad_norm": 2.0409340858459473, "learning_rate": 1.0862518747171534e-06, "loss": 0.6081, "step": 11729 }, { "epoch": 0.8257655755015839, "grad_norm": 1.9709153175354004, "learning_rate": 1.0853980338689383e-06, "loss": 0.7412, "step": 11730 }, { "epoch": 0.825835973248856, "grad_norm": 1.6567955017089844, "learning_rate": 1.0845445005262379e-06, "loss": 0.7809, "step": 11731 }, { "epoch": 0.8259063709961281, "grad_norm": 2.1562483310699463, "learning_rate": 1.0836912747334263e-06, "loss": 0.6113, "step": 11732 }, { "epoch": 0.8259767687434002, "grad_norm": 2.5700173377990723, "learning_rate": 1.0828383565348632e-06, "loss": 0.6249, "step": 11733 }, { "epoch": 0.8260471664906723, "grad_norm": 2.006458282470703, "learning_rate": 1.0819857459748918e-06, "loss": 0.7791, "step": 11734 }, { "epoch": 0.8261175642379444, "grad_norm": 2.227231025695801, "learning_rate": 1.0811334430978422e-06, "loss": 0.7395, "step": 11735 }, { "epoch": 0.8261879619852165, "grad_norm": 1.770276427268982, "learning_rate": 1.0802814479480238e-06, "loss": 0.6748, "step": 11736 }, { "epoch": 0.8262583597324885, "grad_norm": 1.8572019338607788, "learning_rate": 1.0794297605697361e-06, "loss": 0.6397, "step": 11737 }, { "epoch": 0.8263287574797606, "grad_norm": 2.276538133621216, "learning_rate": 1.078578381007257e-06, "loss": 0.6357, "step": 11738 }, { "epoch": 0.8263991552270328, "grad_norm": 1.793267846107483, "learning_rate": 1.0777273093048505e-06, "loss": 0.6101, "step": 11739 }, { "epoch": 0.8264695529743048, "grad_norm": 2.0969796180725098, "learning_rate": 1.0768765455067628e-06, "loss": 0.6176, "step": 11740 }, { "epoch": 0.8265399507215769, "grad_norm": 2.058298110961914, "learning_rate": 1.0760260896572296e-06, "loss": 0.64, "step": 11741 }, { "epoch": 0.826610348468849, "grad_norm": 2.0297250747680664, "learning_rate": 1.0751759418004639e-06, "loss": 0.6627, "step": 11742 }, { "epoch": 0.8266807462161211, "grad_norm": 2.083895444869995, "learning_rate": 1.0743261019806667e-06, "loss": 0.6822, "step": 11743 }, { "epoch": 0.8267511439633932, "grad_norm": 1.9134571552276611, "learning_rate": 1.0734765702420245e-06, "loss": 0.7134, "step": 11744 }, { "epoch": 0.8268215417106652, "grad_norm": 1.8997936248779297, "learning_rate": 1.0726273466287003e-06, "loss": 0.65, "step": 11745 }, { "epoch": 0.8268919394579374, "grad_norm": 1.7747695446014404, "learning_rate": 1.0717784311848502e-06, "loss": 0.6229, "step": 11746 }, { "epoch": 0.8269623372052094, "grad_norm": 1.8545395135879517, "learning_rate": 1.0709298239546062e-06, "loss": 0.7627, "step": 11747 }, { "epoch": 0.8270327349524815, "grad_norm": 1.9362428188323975, "learning_rate": 1.0700815249820924e-06, "loss": 0.6737, "step": 11748 }, { "epoch": 0.8271031326997537, "grad_norm": 4.833147048950195, "learning_rate": 1.0692335343114089e-06, "loss": 0.5995, "step": 11749 }, { "epoch": 0.8271735304470257, "grad_norm": 1.9159475564956665, "learning_rate": 1.0683858519866482e-06, "loss": 0.5766, "step": 11750 }, { "epoch": 0.8272439281942978, "grad_norm": 2.1874923706054688, "learning_rate": 1.067538478051876e-06, "loss": 0.7052, "step": 11751 }, { "epoch": 0.8273143259415698, "grad_norm": 1.9260319471359253, "learning_rate": 1.0666914125511532e-06, "loss": 0.7021, "step": 11752 }, { "epoch": 0.827384723688842, "grad_norm": 1.9797090291976929, "learning_rate": 1.0658446555285148e-06, "loss": 0.6563, "step": 11753 }, { "epoch": 0.827455121436114, "grad_norm": 1.6092268228530884, "learning_rate": 1.0649982070279893e-06, "loss": 0.6078, "step": 11754 }, { "epoch": 0.8275255191833861, "grad_norm": 1.723626971244812, "learning_rate": 1.0641520670935798e-06, "loss": 0.6183, "step": 11755 }, { "epoch": 0.8275959169306583, "grad_norm": 2.1120383739471436, "learning_rate": 1.0633062357692841e-06, "loss": 0.6432, "step": 11756 }, { "epoch": 0.8276663146779303, "grad_norm": 2.013521432876587, "learning_rate": 1.0624607130990703e-06, "loss": 0.6718, "step": 11757 }, { "epoch": 0.8277367124252024, "grad_norm": 1.5783677101135254, "learning_rate": 1.0616154991269026e-06, "loss": 0.5631, "step": 11758 }, { "epoch": 0.8278071101724744, "grad_norm": 1.9372743368148804, "learning_rate": 1.060770593896722e-06, "loss": 0.725, "step": 11759 }, { "epoch": 0.8278775079197466, "grad_norm": 2.208268880844116, "learning_rate": 1.0599259974524585e-06, "loss": 0.6316, "step": 11760 }, { "epoch": 0.8279479056670187, "grad_norm": 1.8652933835983276, "learning_rate": 1.0590817098380211e-06, "loss": 0.6793, "step": 11761 }, { "epoch": 0.8280183034142907, "grad_norm": 2.043830156326294, "learning_rate": 1.0582377310973066e-06, "loss": 0.6455, "step": 11762 }, { "epoch": 0.8280887011615629, "grad_norm": 1.8313099145889282, "learning_rate": 1.0573940612741943e-06, "loss": 0.5468, "step": 11763 }, { "epoch": 0.8281590989088349, "grad_norm": 1.988839030265808, "learning_rate": 1.0565507004125466e-06, "loss": 0.6992, "step": 11764 }, { "epoch": 0.828229496656107, "grad_norm": 1.971311330795288, "learning_rate": 1.0557076485562094e-06, "loss": 0.6827, "step": 11765 }, { "epoch": 0.8282998944033791, "grad_norm": 3.297419309616089, "learning_rate": 1.0548649057490165e-06, "loss": 0.5619, "step": 11766 }, { "epoch": 0.8283702921506512, "grad_norm": 2.1379284858703613, "learning_rate": 1.0540224720347792e-06, "loss": 0.6019, "step": 11767 }, { "epoch": 0.8284406898979233, "grad_norm": 1.8178068399429321, "learning_rate": 1.0531803474573003e-06, "loss": 0.7161, "step": 11768 }, { "epoch": 0.8285110876451953, "grad_norm": 2.171966552734375, "learning_rate": 1.0523385320603611e-06, "loss": 0.6228, "step": 11769 }, { "epoch": 0.8285814853924675, "grad_norm": 2.1364872455596924, "learning_rate": 1.0514970258877288e-06, "loss": 0.6479, "step": 11770 }, { "epoch": 0.8286518831397395, "grad_norm": 1.7620666027069092, "learning_rate": 1.0506558289831507e-06, "loss": 0.6242, "step": 11771 }, { "epoch": 0.8287222808870116, "grad_norm": 1.8743064403533936, "learning_rate": 1.0498149413903661e-06, "loss": 0.593, "step": 11772 }, { "epoch": 0.8287926786342837, "grad_norm": 1.796466588973999, "learning_rate": 1.04897436315309e-06, "loss": 0.6317, "step": 11773 }, { "epoch": 0.8288630763815558, "grad_norm": 2.0288572311401367, "learning_rate": 1.0481340943150283e-06, "loss": 0.7151, "step": 11774 }, { "epoch": 0.8289334741288279, "grad_norm": 1.6945778131484985, "learning_rate": 1.0472941349198647e-06, "loss": 0.6804, "step": 11775 }, { "epoch": 0.8290038718760999, "grad_norm": 1.6837270259857178, "learning_rate": 1.0464544850112709e-06, "loss": 0.7238, "step": 11776 }, { "epoch": 0.829074269623372, "grad_norm": 1.6397615671157837, "learning_rate": 1.045615144632898e-06, "loss": 0.6793, "step": 11777 }, { "epoch": 0.8291446673706442, "grad_norm": 2.278855085372925, "learning_rate": 1.0447761138283883e-06, "loss": 0.5985, "step": 11778 }, { "epoch": 0.8292150651179162, "grad_norm": 1.9640207290649414, "learning_rate": 1.0439373926413591e-06, "loss": 0.606, "step": 11779 }, { "epoch": 0.8292854628651883, "grad_norm": 2.000046968460083, "learning_rate": 1.043098981115422e-06, "loss": 0.7004, "step": 11780 }, { "epoch": 0.8293558606124604, "grad_norm": 1.7933224439620972, "learning_rate": 1.0422608792941633e-06, "loss": 0.5961, "step": 11781 }, { "epoch": 0.8294262583597325, "grad_norm": 2.422968626022339, "learning_rate": 1.0414230872211568e-06, "loss": 0.7182, "step": 11782 }, { "epoch": 0.8294966561070046, "grad_norm": 1.8723886013031006, "learning_rate": 1.040585604939959e-06, "loss": 0.6106, "step": 11783 }, { "epoch": 0.8295670538542766, "grad_norm": 2.0195765495300293, "learning_rate": 1.0397484324941147e-06, "loss": 0.613, "step": 11784 }, { "epoch": 0.8296374516015488, "grad_norm": 1.9527889490127563, "learning_rate": 1.0389115699271452e-06, "loss": 0.5973, "step": 11785 }, { "epoch": 0.8297078493488208, "grad_norm": 2.0800251960754395, "learning_rate": 1.0380750172825638e-06, "loss": 0.629, "step": 11786 }, { "epoch": 0.8297782470960929, "grad_norm": 1.741429090499878, "learning_rate": 1.0372387746038617e-06, "loss": 0.6739, "step": 11787 }, { "epoch": 0.829848644843365, "grad_norm": 2.1203558444976807, "learning_rate": 1.036402841934516e-06, "loss": 0.679, "step": 11788 }, { "epoch": 0.8299190425906371, "grad_norm": 1.8024678230285645, "learning_rate": 1.035567219317986e-06, "loss": 0.6619, "step": 11789 }, { "epoch": 0.8299894403379092, "grad_norm": 1.9664345979690552, "learning_rate": 1.0347319067977193e-06, "loss": 0.7151, "step": 11790 }, { "epoch": 0.8300598380851812, "grad_norm": 1.872698426246643, "learning_rate": 1.0338969044171412e-06, "loss": 0.6098, "step": 11791 }, { "epoch": 0.8301302358324534, "grad_norm": 2.262057065963745, "learning_rate": 1.0330622122196685e-06, "loss": 0.6603, "step": 11792 }, { "epoch": 0.8302006335797254, "grad_norm": 1.8218258619308472, "learning_rate": 1.032227830248695e-06, "loss": 0.6972, "step": 11793 }, { "epoch": 0.8302710313269975, "grad_norm": 1.7197411060333252, "learning_rate": 1.0313937585476e-06, "loss": 0.681, "step": 11794 }, { "epoch": 0.8303414290742697, "grad_norm": 1.7676634788513184, "learning_rate": 1.0305599971597506e-06, "loss": 0.6611, "step": 11795 }, { "epoch": 0.8304118268215417, "grad_norm": 1.9484196901321411, "learning_rate": 1.0297265461284915e-06, "loss": 0.5752, "step": 11796 }, { "epoch": 0.8304822245688138, "grad_norm": 2.3353731632232666, "learning_rate": 1.0288934054971574e-06, "loss": 0.7153, "step": 11797 }, { "epoch": 0.8305526223160858, "grad_norm": 2.024710178375244, "learning_rate": 1.0280605753090635e-06, "loss": 0.7667, "step": 11798 }, { "epoch": 0.830623020063358, "grad_norm": 1.9246197938919067, "learning_rate": 1.0272280556075084e-06, "loss": 0.5551, "step": 11799 }, { "epoch": 0.8306934178106301, "grad_norm": 1.9521030187606812, "learning_rate": 1.0263958464357752e-06, "loss": 0.6599, "step": 11800 }, { "epoch": 0.8307638155579021, "grad_norm": 1.9869035482406616, "learning_rate": 1.0255639478371324e-06, "loss": 0.5819, "step": 11801 }, { "epoch": 0.8308342133051743, "grad_norm": 2.4981844425201416, "learning_rate": 1.02473235985483e-06, "loss": 0.6887, "step": 11802 }, { "epoch": 0.8309046110524463, "grad_norm": 2.032525062561035, "learning_rate": 1.0239010825321048e-06, "loss": 0.6251, "step": 11803 }, { "epoch": 0.8309750087997184, "grad_norm": 1.7464314699172974, "learning_rate": 1.0230701159121746e-06, "loss": 0.6545, "step": 11804 }, { "epoch": 0.8310454065469906, "grad_norm": 2.094531297683716, "learning_rate": 1.0222394600382422e-06, "loss": 0.7864, "step": 11805 }, { "epoch": 0.8311158042942626, "grad_norm": 1.8416595458984375, "learning_rate": 1.0214091149534928e-06, "loss": 0.6207, "step": 11806 }, { "epoch": 0.8311862020415347, "grad_norm": 4.786726474761963, "learning_rate": 1.0205790807010993e-06, "loss": 0.6756, "step": 11807 }, { "epoch": 0.8312565997888067, "grad_norm": 2.2290782928466797, "learning_rate": 1.019749357324213e-06, "loss": 0.7367, "step": 11808 }, { "epoch": 0.8313269975360789, "grad_norm": 2.9232380390167236, "learning_rate": 1.0189199448659758e-06, "loss": 0.7888, "step": 11809 }, { "epoch": 0.8313973952833509, "grad_norm": 2.0332534313201904, "learning_rate": 1.018090843369507e-06, "loss": 0.6862, "step": 11810 }, { "epoch": 0.831467793030623, "grad_norm": 1.982346773147583, "learning_rate": 1.017262052877913e-06, "loss": 0.6847, "step": 11811 }, { "epoch": 0.8315381907778951, "grad_norm": 2.0246989727020264, "learning_rate": 1.0164335734342815e-06, "loss": 0.7049, "step": 11812 }, { "epoch": 0.8316085885251672, "grad_norm": 2.117277145385742, "learning_rate": 1.0156054050816888e-06, "loss": 0.6308, "step": 11813 }, { "epoch": 0.8316789862724393, "grad_norm": 2.048142671585083, "learning_rate": 1.0147775478631892e-06, "loss": 0.6308, "step": 11814 }, { "epoch": 0.8317493840197113, "grad_norm": 2.7204959392547607, "learning_rate": 1.0139500018218275e-06, "loss": 0.6021, "step": 11815 }, { "epoch": 0.8318197817669835, "grad_norm": 2.555323362350464, "learning_rate": 1.0131227670006266e-06, "loss": 0.7267, "step": 11816 }, { "epoch": 0.8318901795142556, "grad_norm": 1.7068243026733398, "learning_rate": 1.0122958434425945e-06, "loss": 0.6303, "step": 11817 }, { "epoch": 0.8319605772615276, "grad_norm": 1.9000121355056763, "learning_rate": 1.0114692311907237e-06, "loss": 0.5921, "step": 11818 }, { "epoch": 0.8320309750087997, "grad_norm": 1.64280104637146, "learning_rate": 1.0106429302879912e-06, "loss": 0.6873, "step": 11819 }, { "epoch": 0.8321013727560718, "grad_norm": 1.9385122060775757, "learning_rate": 1.0098169407773563e-06, "loss": 0.7587, "step": 11820 }, { "epoch": 0.8321717705033439, "grad_norm": 1.9547218084335327, "learning_rate": 1.0089912627017652e-06, "loss": 0.7014, "step": 11821 }, { "epoch": 0.832242168250616, "grad_norm": 1.5750517845153809, "learning_rate": 1.0081658961041443e-06, "loss": 0.6676, "step": 11822 }, { "epoch": 0.832312565997888, "grad_norm": 1.7348181009292603, "learning_rate": 1.0073408410274048e-06, "loss": 0.7694, "step": 11823 }, { "epoch": 0.8323829637451602, "grad_norm": 1.8851971626281738, "learning_rate": 1.006516097514441e-06, "loss": 0.7424, "step": 11824 }, { "epoch": 0.8324533614924322, "grad_norm": 1.7109941244125366, "learning_rate": 1.0056916656081346e-06, "loss": 0.5753, "step": 11825 }, { "epoch": 0.8325237592397043, "grad_norm": 1.787218451499939, "learning_rate": 1.004867545351346e-06, "loss": 0.6279, "step": 11826 }, { "epoch": 0.8325941569869764, "grad_norm": 1.9828084707260132, "learning_rate": 1.004043736786924e-06, "loss": 0.5164, "step": 11827 }, { "epoch": 0.8326645547342485, "grad_norm": 2.0185019969940186, "learning_rate": 1.0032202399576989e-06, "loss": 0.6436, "step": 11828 }, { "epoch": 0.8327349524815206, "grad_norm": 2.0849947929382324, "learning_rate": 1.0023970549064845e-06, "loss": 0.692, "step": 11829 }, { "epoch": 0.8328053502287927, "grad_norm": 2.312873125076294, "learning_rate": 1.0015741816760773e-06, "loss": 0.7408, "step": 11830 }, { "epoch": 0.8328757479760648, "grad_norm": 1.7708053588867188, "learning_rate": 1.0007516203092626e-06, "loss": 0.6715, "step": 11831 }, { "epoch": 0.8329461457233368, "grad_norm": 2.857072114944458, "learning_rate": 9.999293708488028e-07, "loss": 0.6449, "step": 11832 }, { "epoch": 0.8330165434706089, "grad_norm": 1.6391522884368896, "learning_rate": 9.991074333374506e-07, "loss": 0.6691, "step": 11833 }, { "epoch": 0.8330869412178811, "grad_norm": 1.9650218486785889, "learning_rate": 9.982858078179375e-07, "loss": 0.6047, "step": 11834 }, { "epoch": 0.8331573389651531, "grad_norm": 2.042174816131592, "learning_rate": 9.974644943329813e-07, "loss": 0.6758, "step": 11835 }, { "epoch": 0.8332277367124252, "grad_norm": 3.3807153701782227, "learning_rate": 9.966434929252803e-07, "loss": 0.6424, "step": 11836 }, { "epoch": 0.8332981344596972, "grad_norm": 2.203474521636963, "learning_rate": 9.958228036375224e-07, "loss": 0.6838, "step": 11837 }, { "epoch": 0.8333685322069694, "grad_norm": 1.9949090480804443, "learning_rate": 9.950024265123733e-07, "loss": 0.6207, "step": 11838 }, { "epoch": 0.8334389299542415, "grad_norm": 1.9815353155136108, "learning_rate": 9.941823615924881e-07, "loss": 0.6932, "step": 11839 }, { "epoch": 0.8335093277015135, "grad_norm": 2.224470376968384, "learning_rate": 9.93362608920501e-07, "loss": 0.8333, "step": 11840 }, { "epoch": 0.8335797254487857, "grad_norm": 2.1539809703826904, "learning_rate": 9.925431685390314e-07, "loss": 0.66, "step": 11841 }, { "epoch": 0.8336501231960577, "grad_norm": 2.3457577228546143, "learning_rate": 9.91724040490682e-07, "loss": 0.7075, "step": 11842 }, { "epoch": 0.8337205209433298, "grad_norm": 1.8069336414337158, "learning_rate": 9.909052248180416e-07, "loss": 0.6574, "step": 11843 }, { "epoch": 0.8337909186906018, "grad_norm": 2.2363688945770264, "learning_rate": 9.90086721563682e-07, "loss": 0.6468, "step": 11844 }, { "epoch": 0.833861316437874, "grad_norm": 2.0500166416168213, "learning_rate": 9.89268530770155e-07, "loss": 0.5697, "step": 11845 }, { "epoch": 0.8339317141851461, "grad_norm": 2.2500364780426025, "learning_rate": 9.884506524800033e-07, "loss": 0.6877, "step": 11846 }, { "epoch": 0.8340021119324181, "grad_norm": 2.2594716548919678, "learning_rate": 9.876330867357437e-07, "loss": 0.6564, "step": 11847 }, { "epoch": 0.8340725096796903, "grad_norm": 2.0518603324890137, "learning_rate": 9.868158335798868e-07, "loss": 0.549, "step": 11848 }, { "epoch": 0.8341429074269623, "grad_norm": 1.9076703786849976, "learning_rate": 9.859988930549196e-07, "loss": 0.6138, "step": 11849 }, { "epoch": 0.8342133051742344, "grad_norm": 2.008492946624756, "learning_rate": 9.851822652033168e-07, "loss": 0.6026, "step": 11850 }, { "epoch": 0.8342837029215066, "grad_norm": 2.028149366378784, "learning_rate": 9.843659500675346e-07, "loss": 0.6527, "step": 11851 }, { "epoch": 0.8343541006687786, "grad_norm": 1.8890113830566406, "learning_rate": 9.835499476900178e-07, "loss": 0.4955, "step": 11852 }, { "epoch": 0.8344244984160507, "grad_norm": 3.130518674850464, "learning_rate": 9.827342581131855e-07, "loss": 0.6469, "step": 11853 }, { "epoch": 0.8344948961633227, "grad_norm": 2.2375717163085938, "learning_rate": 9.819188813794494e-07, "loss": 0.7268, "step": 11854 }, { "epoch": 0.8345652939105949, "grad_norm": 2.1378681659698486, "learning_rate": 9.811038175311991e-07, "loss": 0.5218, "step": 11855 }, { "epoch": 0.834635691657867, "grad_norm": 2.090581178665161, "learning_rate": 9.80289066610815e-07, "loss": 0.675, "step": 11856 }, { "epoch": 0.834706089405139, "grad_norm": 2.288585901260376, "learning_rate": 9.794746286606518e-07, "loss": 0.713, "step": 11857 }, { "epoch": 0.8347764871524112, "grad_norm": 1.8675429821014404, "learning_rate": 9.786605037230583e-07, "loss": 0.6176, "step": 11858 }, { "epoch": 0.8348468848996832, "grad_norm": 1.858512043952942, "learning_rate": 9.778466918403563e-07, "loss": 0.6777, "step": 11859 }, { "epoch": 0.8349172826469553, "grad_norm": 2.6141397953033447, "learning_rate": 9.770331930548593e-07, "loss": 0.563, "step": 11860 }, { "epoch": 0.8349876803942274, "grad_norm": 1.7100967168807983, "learning_rate": 9.762200074088595e-07, "loss": 0.5469, "step": 11861 }, { "epoch": 0.8350580781414995, "grad_norm": 1.7507002353668213, "learning_rate": 9.754071349446398e-07, "loss": 0.6465, "step": 11862 }, { "epoch": 0.8351284758887716, "grad_norm": 2.3309056758880615, "learning_rate": 9.74594575704457e-07, "loss": 0.7548, "step": 11863 }, { "epoch": 0.8351988736360436, "grad_norm": 3.2890467643737793, "learning_rate": 9.737823297305622e-07, "loss": 0.5869, "step": 11864 }, { "epoch": 0.8352692713833157, "grad_norm": 1.9718685150146484, "learning_rate": 9.72970397065178e-07, "loss": 0.5789, "step": 11865 }, { "epoch": 0.8353396691305878, "grad_norm": 2.0817525386810303, "learning_rate": 9.721587777505238e-07, "loss": 0.6261, "step": 11866 }, { "epoch": 0.8354100668778599, "grad_norm": 1.9797958135604858, "learning_rate": 9.71347471828792e-07, "loss": 0.6796, "step": 11867 }, { "epoch": 0.835480464625132, "grad_norm": 5.007521629333496, "learning_rate": 9.705364793421665e-07, "loss": 0.7907, "step": 11868 }, { "epoch": 0.8355508623724041, "grad_norm": 2.1641643047332764, "learning_rate": 9.69725800332809e-07, "loss": 0.7336, "step": 11869 }, { "epoch": 0.8356212601196762, "grad_norm": 1.7456432580947876, "learning_rate": 9.689154348428694e-07, "loss": 0.5003, "step": 11870 }, { "epoch": 0.8356916578669482, "grad_norm": 1.9123005867004395, "learning_rate": 9.681053829144785e-07, "loss": 0.5893, "step": 11871 }, { "epoch": 0.8357620556142203, "grad_norm": 1.9720799922943115, "learning_rate": 9.672956445897523e-07, "loss": 0.7124, "step": 11872 }, { "epoch": 0.8358324533614925, "grad_norm": 1.8450602293014526, "learning_rate": 9.664862199107862e-07, "loss": 0.5762, "step": 11873 }, { "epoch": 0.8359028511087645, "grad_norm": 2.293516159057617, "learning_rate": 9.656771089196685e-07, "loss": 0.6909, "step": 11874 }, { "epoch": 0.8359732488560366, "grad_norm": 1.987818956375122, "learning_rate": 9.648683116584605e-07, "loss": 0.6315, "step": 11875 }, { "epoch": 0.8360436466033087, "grad_norm": 1.9014067649841309, "learning_rate": 9.640598281692173e-07, "loss": 0.6329, "step": 11876 }, { "epoch": 0.8361140443505808, "grad_norm": 2.312798261642456, "learning_rate": 9.63251658493969e-07, "loss": 0.6112, "step": 11877 }, { "epoch": 0.8361844420978529, "grad_norm": 2.158432960510254, "learning_rate": 9.624438026747348e-07, "loss": 0.5675, "step": 11878 }, { "epoch": 0.8362548398451249, "grad_norm": 1.899212121963501, "learning_rate": 9.61636260753514e-07, "loss": 0.594, "step": 11879 }, { "epoch": 0.8363252375923971, "grad_norm": 1.9241764545440674, "learning_rate": 9.608290327722938e-07, "loss": 0.7018, "step": 11880 }, { "epoch": 0.8363956353396691, "grad_norm": 1.742537498474121, "learning_rate": 9.600221187730399e-07, "loss": 0.6497, "step": 11881 }, { "epoch": 0.8364660330869412, "grad_norm": 2.4315872192382812, "learning_rate": 9.592155187977077e-07, "loss": 0.7088, "step": 11882 }, { "epoch": 0.8365364308342133, "grad_norm": 1.9719769954681396, "learning_rate": 9.584092328882314e-07, "loss": 0.6495, "step": 11883 }, { "epoch": 0.8366068285814854, "grad_norm": 2.225410223007202, "learning_rate": 9.576032610865312e-07, "loss": 0.6399, "step": 11884 }, { "epoch": 0.8366772263287575, "grad_norm": 2.4155564308166504, "learning_rate": 9.567976034345076e-07, "loss": 0.729, "step": 11885 }, { "epoch": 0.8367476240760295, "grad_norm": 2.2682721614837646, "learning_rate": 9.559922599740517e-07, "loss": 0.6942, "step": 11886 }, { "epoch": 0.8368180218233017, "grad_norm": 2.2352585792541504, "learning_rate": 9.551872307470304e-07, "loss": 0.6176, "step": 11887 }, { "epoch": 0.8368884195705737, "grad_norm": 2.0744550228118896, "learning_rate": 9.543825157953014e-07, "loss": 0.6062, "step": 11888 }, { "epoch": 0.8369588173178458, "grad_norm": 1.937880039215088, "learning_rate": 9.535781151606996e-07, "loss": 0.5982, "step": 11889 }, { "epoch": 0.837029215065118, "grad_norm": 2.2099926471710205, "learning_rate": 9.527740288850486e-07, "loss": 0.7, "step": 11890 }, { "epoch": 0.83709961281239, "grad_norm": 2.199674129486084, "learning_rate": 9.5197025701015e-07, "loss": 0.7058, "step": 11891 }, { "epoch": 0.8371700105596621, "grad_norm": 1.7452260255813599, "learning_rate": 9.511667995777954e-07, "loss": 0.5666, "step": 11892 }, { "epoch": 0.8372404083069341, "grad_norm": 1.7155158519744873, "learning_rate": 9.503636566297594e-07, "loss": 0.6734, "step": 11893 }, { "epoch": 0.8373108060542063, "grad_norm": 2.0733320713043213, "learning_rate": 9.495608282077953e-07, "loss": 0.6101, "step": 11894 }, { "epoch": 0.8373812038014784, "grad_norm": 1.95787513256073, "learning_rate": 9.487583143536426e-07, "loss": 0.6042, "step": 11895 }, { "epoch": 0.8374516015487504, "grad_norm": 2.1931326389312744, "learning_rate": 9.479561151090247e-07, "loss": 0.6947, "step": 11896 }, { "epoch": 0.8375219992960226, "grad_norm": 2.832143783569336, "learning_rate": 9.471542305156513e-07, "loss": 0.7827, "step": 11897 }, { "epoch": 0.8375923970432946, "grad_norm": 1.7975903749465942, "learning_rate": 9.463526606152092e-07, "loss": 0.5243, "step": 11898 }, { "epoch": 0.8376627947905667, "grad_norm": 2.002018928527832, "learning_rate": 9.455514054493764e-07, "loss": 0.7445, "step": 11899 }, { "epoch": 0.8377331925378388, "grad_norm": 2.0564229488372803, "learning_rate": 9.447504650598095e-07, "loss": 0.7574, "step": 11900 }, { "epoch": 0.8378035902851109, "grad_norm": 2.0475523471832275, "learning_rate": 9.439498394881489e-07, "loss": 0.6399, "step": 11901 }, { "epoch": 0.837873988032383, "grad_norm": 1.8461024761199951, "learning_rate": 9.431495287760201e-07, "loss": 0.7049, "step": 11902 }, { "epoch": 0.837944385779655, "grad_norm": 2.1855337619781494, "learning_rate": 9.423495329650346e-07, "loss": 0.5842, "step": 11903 }, { "epoch": 0.8380147835269272, "grad_norm": 2.035095453262329, "learning_rate": 9.415498520967806e-07, "loss": 0.6343, "step": 11904 }, { "epoch": 0.8380851812741992, "grad_norm": 1.8983060121536255, "learning_rate": 9.407504862128388e-07, "loss": 0.66, "step": 11905 }, { "epoch": 0.8381555790214713, "grad_norm": 1.923671007156372, "learning_rate": 9.399514353547664e-07, "loss": 0.6765, "step": 11906 }, { "epoch": 0.8382259767687434, "grad_norm": 1.852258324623108, "learning_rate": 9.391526995641074e-07, "loss": 0.5492, "step": 11907 }, { "epoch": 0.8382963745160155, "grad_norm": 1.6002683639526367, "learning_rate": 9.383542788823867e-07, "loss": 0.6223, "step": 11908 }, { "epoch": 0.8383667722632876, "grad_norm": 1.7320125102996826, "learning_rate": 9.375561733511191e-07, "loss": 0.6808, "step": 11909 }, { "epoch": 0.8384371700105596, "grad_norm": 1.715061902999878, "learning_rate": 9.367583830117947e-07, "loss": 0.4669, "step": 11910 }, { "epoch": 0.8385075677578318, "grad_norm": 1.9795764684677124, "learning_rate": 9.359609079058942e-07, "loss": 0.6041, "step": 11911 }, { "epoch": 0.8385779655051039, "grad_norm": 1.9655967950820923, "learning_rate": 9.351637480748782e-07, "loss": 0.6444, "step": 11912 }, { "epoch": 0.8386483632523759, "grad_norm": 3.498270273208618, "learning_rate": 9.343669035601916e-07, "loss": 0.6078, "step": 11913 }, { "epoch": 0.838718760999648, "grad_norm": 1.907637596130371, "learning_rate": 9.335703744032622e-07, "loss": 0.7282, "step": 11914 }, { "epoch": 0.8387891587469201, "grad_norm": 2.3639867305755615, "learning_rate": 9.327741606455039e-07, "loss": 0.5893, "step": 11915 }, { "epoch": 0.8388595564941922, "grad_norm": 2.0383708477020264, "learning_rate": 9.319782623283103e-07, "loss": 0.63, "step": 11916 }, { "epoch": 0.8389299542414643, "grad_norm": 2.8173999786376953, "learning_rate": 9.311826794930636e-07, "loss": 0.6538, "step": 11917 }, { "epoch": 0.8390003519887363, "grad_norm": 1.929152011871338, "learning_rate": 9.303874121811263e-07, "loss": 0.5697, "step": 11918 }, { "epoch": 0.8390707497360085, "grad_norm": 1.9143911600112915, "learning_rate": 9.295924604338446e-07, "loss": 0.739, "step": 11919 }, { "epoch": 0.8391411474832805, "grad_norm": 2.237581253051758, "learning_rate": 9.287978242925461e-07, "loss": 0.6714, "step": 11920 }, { "epoch": 0.8392115452305526, "grad_norm": 1.9224939346313477, "learning_rate": 9.280035037985492e-07, "loss": 0.6605, "step": 11921 }, { "epoch": 0.8392819429778247, "grad_norm": 2.0145907402038574, "learning_rate": 9.272094989931479e-07, "loss": 0.6354, "step": 11922 }, { "epoch": 0.8393523407250968, "grad_norm": 1.6656808853149414, "learning_rate": 9.264158099176256e-07, "loss": 0.6666, "step": 11923 }, { "epoch": 0.8394227384723689, "grad_norm": 2.3115999698638916, "learning_rate": 9.256224366132463e-07, "loss": 0.7159, "step": 11924 }, { "epoch": 0.839493136219641, "grad_norm": 1.940340280532837, "learning_rate": 9.248293791212581e-07, "loss": 0.6564, "step": 11925 }, { "epoch": 0.8395635339669131, "grad_norm": 2.0944511890411377, "learning_rate": 9.240366374828902e-07, "loss": 0.5968, "step": 11926 }, { "epoch": 0.8396339317141851, "grad_norm": 2.3638877868652344, "learning_rate": 9.232442117393628e-07, "loss": 0.6211, "step": 11927 }, { "epoch": 0.8397043294614572, "grad_norm": 2.098498821258545, "learning_rate": 9.224521019318698e-07, "loss": 0.7039, "step": 11928 }, { "epoch": 0.8397747272087294, "grad_norm": 1.9541692733764648, "learning_rate": 9.216603081015981e-07, "loss": 0.5384, "step": 11929 }, { "epoch": 0.8398451249560014, "grad_norm": 2.31354022026062, "learning_rate": 9.208688302897116e-07, "loss": 0.6635, "step": 11930 }, { "epoch": 0.8399155227032735, "grad_norm": 2.02278208732605, "learning_rate": 9.200776685373604e-07, "loss": 0.7346, "step": 11931 }, { "epoch": 0.8399859204505455, "grad_norm": 1.850508451461792, "learning_rate": 9.192868228856756e-07, "loss": 0.7429, "step": 11932 }, { "epoch": 0.8400563181978177, "grad_norm": 1.8289672136306763, "learning_rate": 9.184962933757782e-07, "loss": 0.718, "step": 11933 }, { "epoch": 0.8401267159450898, "grad_norm": 1.66080641746521, "learning_rate": 9.177060800487641e-07, "loss": 0.5948, "step": 11934 }, { "epoch": 0.8401971136923618, "grad_norm": 1.4064574241638184, "learning_rate": 9.169161829457208e-07, "loss": 0.6499, "step": 11935 }, { "epoch": 0.840267511439634, "grad_norm": 3.1613857746124268, "learning_rate": 9.161266021077147e-07, "loss": 0.6006, "step": 11936 }, { "epoch": 0.840337909186906, "grad_norm": 2.0942885875701904, "learning_rate": 9.15337337575796e-07, "loss": 0.7392, "step": 11937 }, { "epoch": 0.8404083069341781, "grad_norm": 1.7091478109359741, "learning_rate": 9.145483893909986e-07, "loss": 0.64, "step": 11938 }, { "epoch": 0.8404787046814501, "grad_norm": 2.630505323410034, "learning_rate": 9.137597575943432e-07, "loss": 0.7203, "step": 11939 }, { "epoch": 0.8405491024287223, "grad_norm": 1.6979405879974365, "learning_rate": 9.129714422268286e-07, "loss": 0.6369, "step": 11940 }, { "epoch": 0.8406195001759944, "grad_norm": 1.7149690389633179, "learning_rate": 9.121834433294427e-07, "loss": 0.6913, "step": 11941 }, { "epoch": 0.8406898979232664, "grad_norm": 1.9496560096740723, "learning_rate": 9.113957609431533e-07, "loss": 0.7427, "step": 11942 }, { "epoch": 0.8407602956705386, "grad_norm": 2.172482967376709, "learning_rate": 9.106083951089104e-07, "loss": 0.7397, "step": 11943 }, { "epoch": 0.8408306934178106, "grad_norm": 1.9663090705871582, "learning_rate": 9.098213458676538e-07, "loss": 0.6799, "step": 11944 }, { "epoch": 0.8409010911650827, "grad_norm": 1.9403783082962036, "learning_rate": 9.090346132603e-07, "loss": 0.6506, "step": 11945 }, { "epoch": 0.8409714889123548, "grad_norm": 1.852002739906311, "learning_rate": 9.082481973277538e-07, "loss": 0.5761, "step": 11946 }, { "epoch": 0.8410418866596269, "grad_norm": 1.637376308441162, "learning_rate": 9.074620981108997e-07, "loss": 0.616, "step": 11947 }, { "epoch": 0.841112284406899, "grad_norm": 1.704947829246521, "learning_rate": 9.066763156506117e-07, "loss": 0.6789, "step": 11948 }, { "epoch": 0.841182682154171, "grad_norm": 2.1198372840881348, "learning_rate": 9.058908499877369e-07, "loss": 0.6886, "step": 11949 }, { "epoch": 0.8412530799014432, "grad_norm": 1.7787586450576782, "learning_rate": 9.051057011631182e-07, "loss": 0.6649, "step": 11950 }, { "epoch": 0.8413234776487153, "grad_norm": 2.1381659507751465, "learning_rate": 9.043208692175723e-07, "loss": 0.6121, "step": 11951 }, { "epoch": 0.8413938753959873, "grad_norm": 2.292370319366455, "learning_rate": 9.035363541919065e-07, "loss": 0.6161, "step": 11952 }, { "epoch": 0.8414642731432594, "grad_norm": 2.183227062225342, "learning_rate": 9.027521561269056e-07, "loss": 0.6963, "step": 11953 }, { "epoch": 0.8415346708905315, "grad_norm": 2.0537126064300537, "learning_rate": 9.019682750633448e-07, "loss": 0.7325, "step": 11954 }, { "epoch": 0.8416050686378036, "grad_norm": 2.1232261657714844, "learning_rate": 9.011847110419736e-07, "loss": 0.7432, "step": 11955 }, { "epoch": 0.8416754663850757, "grad_norm": 2.1536478996276855, "learning_rate": 9.004014641035335e-07, "loss": 0.6747, "step": 11956 }, { "epoch": 0.8417458641323478, "grad_norm": 1.718031406402588, "learning_rate": 8.996185342887444e-07, "loss": 0.6474, "step": 11957 }, { "epoch": 0.8418162618796199, "grad_norm": 2.2083163261413574, "learning_rate": 8.988359216383137e-07, "loss": 0.6554, "step": 11958 }, { "epoch": 0.8418866596268919, "grad_norm": 2.100618839263916, "learning_rate": 8.980536261929272e-07, "loss": 0.6027, "step": 11959 }, { "epoch": 0.841957057374164, "grad_norm": 2.0426692962646484, "learning_rate": 8.972716479932626e-07, "loss": 0.6667, "step": 11960 }, { "epoch": 0.8420274551214361, "grad_norm": 2.988525390625, "learning_rate": 8.964899870799685e-07, "loss": 0.719, "step": 11961 }, { "epoch": 0.8420978528687082, "grad_norm": 1.870186448097229, "learning_rate": 8.957086434936893e-07, "loss": 0.5827, "step": 11962 }, { "epoch": 0.8421682506159803, "grad_norm": 2.2353708744049072, "learning_rate": 8.949276172750445e-07, "loss": 0.7437, "step": 11963 }, { "epoch": 0.8422386483632524, "grad_norm": 1.922163724899292, "learning_rate": 8.941469084646438e-07, "loss": 0.7653, "step": 11964 }, { "epoch": 0.8423090461105245, "grad_norm": 1.859761357307434, "learning_rate": 8.933665171030733e-07, "loss": 0.6517, "step": 11965 }, { "epoch": 0.8423794438577965, "grad_norm": 1.7190653085708618, "learning_rate": 8.925864432309115e-07, "loss": 0.5997, "step": 11966 }, { "epoch": 0.8424498416050686, "grad_norm": 2.524077892303467, "learning_rate": 8.91806686888708e-07, "loss": 0.5992, "step": 11967 }, { "epoch": 0.8425202393523408, "grad_norm": 1.5954021215438843, "learning_rate": 8.910272481170086e-07, "loss": 0.576, "step": 11968 }, { "epoch": 0.8425906370996128, "grad_norm": 1.880162239074707, "learning_rate": 8.902481269563342e-07, "loss": 0.6651, "step": 11969 }, { "epoch": 0.8426610348468849, "grad_norm": 1.783121109008789, "learning_rate": 8.894693234471934e-07, "loss": 0.5986, "step": 11970 }, { "epoch": 0.842731432594157, "grad_norm": 2.119314432144165, "learning_rate": 8.886908376300759e-07, "loss": 0.6378, "step": 11971 }, { "epoch": 0.8428018303414291, "grad_norm": 2.0516304969787598, "learning_rate": 8.879126695454577e-07, "loss": 0.611, "step": 11972 }, { "epoch": 0.8428722280887012, "grad_norm": 2.368353843688965, "learning_rate": 8.87134819233795e-07, "loss": 0.5971, "step": 11973 }, { "epoch": 0.8429426258359732, "grad_norm": 2.2456343173980713, "learning_rate": 8.863572867355296e-07, "loss": 0.66, "step": 11974 }, { "epoch": 0.8430130235832454, "grad_norm": 1.9594826698303223, "learning_rate": 8.855800720910845e-07, "loss": 0.6252, "step": 11975 }, { "epoch": 0.8430834213305174, "grad_norm": 1.8720499277114868, "learning_rate": 8.848031753408694e-07, "loss": 0.612, "step": 11976 }, { "epoch": 0.8431538190777895, "grad_norm": 2.015707492828369, "learning_rate": 8.840265965252747e-07, "loss": 0.6385, "step": 11977 }, { "epoch": 0.8432242168250615, "grad_norm": 2.0578813552856445, "learning_rate": 8.832503356846776e-07, "loss": 0.6957, "step": 11978 }, { "epoch": 0.8432946145723337, "grad_norm": 2.2612390518188477, "learning_rate": 8.82474392859435e-07, "loss": 0.6667, "step": 11979 }, { "epoch": 0.8433650123196058, "grad_norm": 1.797975778579712, "learning_rate": 8.816987680898897e-07, "loss": 0.6257, "step": 11980 }, { "epoch": 0.8434354100668778, "grad_norm": 1.904595136642456, "learning_rate": 8.80923461416364e-07, "loss": 0.6578, "step": 11981 }, { "epoch": 0.84350580781415, "grad_norm": 1.6964287757873535, "learning_rate": 8.801484728791718e-07, "loss": 0.701, "step": 11982 }, { "epoch": 0.843576205561422, "grad_norm": 1.767348289489746, "learning_rate": 8.79373802518601e-07, "loss": 0.5963, "step": 11983 }, { "epoch": 0.8436466033086941, "grad_norm": 2.138378858566284, "learning_rate": 8.785994503749309e-07, "loss": 0.7245, "step": 11984 }, { "epoch": 0.8437170010559663, "grad_norm": 1.9704139232635498, "learning_rate": 8.778254164884196e-07, "loss": 0.6554, "step": 11985 }, { "epoch": 0.8437873988032383, "grad_norm": 2.0024943351745605, "learning_rate": 8.77051700899309e-07, "loss": 0.7251, "step": 11986 }, { "epoch": 0.8438577965505104, "grad_norm": 2.019590139389038, "learning_rate": 8.762783036478241e-07, "loss": 0.7094, "step": 11987 }, { "epoch": 0.8439281942977824, "grad_norm": 1.8085826635360718, "learning_rate": 8.755052247741779e-07, "loss": 0.642, "step": 11988 }, { "epoch": 0.8439985920450546, "grad_norm": 2.0684685707092285, "learning_rate": 8.747324643185603e-07, "loss": 0.5302, "step": 11989 }, { "epoch": 0.8440689897923267, "grad_norm": 2.038626194000244, "learning_rate": 8.739600223211504e-07, "loss": 0.7186, "step": 11990 }, { "epoch": 0.8441393875395987, "grad_norm": 1.8479328155517578, "learning_rate": 8.731878988221074e-07, "loss": 0.7355, "step": 11991 }, { "epoch": 0.8442097852868709, "grad_norm": 1.8110523223876953, "learning_rate": 8.724160938615741e-07, "loss": 0.6209, "step": 11992 }, { "epoch": 0.8442801830341429, "grad_norm": 2.706589698791504, "learning_rate": 8.716446074796765e-07, "loss": 0.87, "step": 11993 }, { "epoch": 0.844350580781415, "grad_norm": 1.759390115737915, "learning_rate": 8.708734397165252e-07, "loss": 0.6568, "step": 11994 }, { "epoch": 0.844420978528687, "grad_norm": 1.9835692644119263, "learning_rate": 8.701025906122167e-07, "loss": 0.6505, "step": 11995 }, { "epoch": 0.8444913762759592, "grad_norm": 2.252683639526367, "learning_rate": 8.69332060206826e-07, "loss": 0.6635, "step": 11996 }, { "epoch": 0.8445617740232313, "grad_norm": 1.8339051008224487, "learning_rate": 8.685618485404137e-07, "loss": 0.6008, "step": 11997 }, { "epoch": 0.8446321717705033, "grad_norm": 2.128572463989258, "learning_rate": 8.677919556530223e-07, "loss": 0.5697, "step": 11998 }, { "epoch": 0.8447025695177754, "grad_norm": 1.7478877305984497, "learning_rate": 8.670223815846821e-07, "loss": 0.5678, "step": 11999 }, { "epoch": 0.8447729672650475, "grad_norm": 1.7025572061538696, "learning_rate": 8.662531263754011e-07, "loss": 0.5468, "step": 12000 }, { "epoch": 0.8448433650123196, "grad_norm": 1.863908052444458, "learning_rate": 8.654841900651773e-07, "loss": 0.6418, "step": 12001 }, { "epoch": 0.8449137627595917, "grad_norm": 1.9756085872650146, "learning_rate": 8.647155726939852e-07, "loss": 0.6023, "step": 12002 }, { "epoch": 0.8449841605068638, "grad_norm": 1.9528632164001465, "learning_rate": 8.639472743017878e-07, "loss": 0.7549, "step": 12003 }, { "epoch": 0.8450545582541359, "grad_norm": 2.0850720405578613, "learning_rate": 8.631792949285271e-07, "loss": 0.6573, "step": 12004 }, { "epoch": 0.8451249560014079, "grad_norm": 2.0792953968048096, "learning_rate": 8.62411634614134e-07, "loss": 0.6495, "step": 12005 }, { "epoch": 0.84519535374868, "grad_norm": 1.9516854286193848, "learning_rate": 8.61644293398517e-07, "loss": 0.5947, "step": 12006 }, { "epoch": 0.8452657514959522, "grad_norm": 2.0438404083251953, "learning_rate": 8.608772713215739e-07, "loss": 0.6897, "step": 12007 }, { "epoch": 0.8453361492432242, "grad_norm": 2.1773784160614014, "learning_rate": 8.601105684231812e-07, "loss": 0.4839, "step": 12008 }, { "epoch": 0.8454065469904963, "grad_norm": 2.00911283493042, "learning_rate": 8.59344184743201e-07, "loss": 0.5644, "step": 12009 }, { "epoch": 0.8454769447377684, "grad_norm": 2.045351982116699, "learning_rate": 8.585781203214763e-07, "loss": 0.7135, "step": 12010 }, { "epoch": 0.8455473424850405, "grad_norm": 1.7235389947891235, "learning_rate": 8.578123751978388e-07, "loss": 0.684, "step": 12011 }, { "epoch": 0.8456177402323126, "grad_norm": 6.825764179229736, "learning_rate": 8.570469494120968e-07, "loss": 0.6952, "step": 12012 }, { "epoch": 0.8456881379795846, "grad_norm": 1.5847210884094238, "learning_rate": 8.562818430040492e-07, "loss": 0.5383, "step": 12013 }, { "epoch": 0.8457585357268568, "grad_norm": 1.9202572107315063, "learning_rate": 8.555170560134724e-07, "loss": 0.6171, "step": 12014 }, { "epoch": 0.8458289334741288, "grad_norm": 2.20739483833313, "learning_rate": 8.54752588480128e-07, "loss": 0.6361, "step": 12015 }, { "epoch": 0.8458993312214009, "grad_norm": 2.2145626544952393, "learning_rate": 8.539884404437614e-07, "loss": 0.5276, "step": 12016 }, { "epoch": 0.845969728968673, "grad_norm": 1.6211379766464233, "learning_rate": 8.532246119441031e-07, "loss": 0.6138, "step": 12017 }, { "epoch": 0.8460401267159451, "grad_norm": 2.0243048667907715, "learning_rate": 8.524611030208628e-07, "loss": 0.6431, "step": 12018 }, { "epoch": 0.8461105244632172, "grad_norm": 2.0953104496002197, "learning_rate": 8.516979137137381e-07, "loss": 0.6203, "step": 12019 }, { "epoch": 0.8461809222104892, "grad_norm": 2.0164268016815186, "learning_rate": 8.509350440624078e-07, "loss": 0.7068, "step": 12020 }, { "epoch": 0.8462513199577614, "grad_norm": 2.0234463214874268, "learning_rate": 8.501724941065331e-07, "loss": 0.6729, "step": 12021 }, { "epoch": 0.8463217177050334, "grad_norm": 2.4009599685668945, "learning_rate": 8.494102638857592e-07, "loss": 0.5928, "step": 12022 }, { "epoch": 0.8463921154523055, "grad_norm": 1.9578211307525635, "learning_rate": 8.486483534397171e-07, "loss": 0.5652, "step": 12023 }, { "epoch": 0.8464625131995777, "grad_norm": 1.5387351512908936, "learning_rate": 8.478867628080161e-07, "loss": 0.5658, "step": 12024 }, { "epoch": 0.8465329109468497, "grad_norm": 2.8826587200164795, "learning_rate": 8.471254920302562e-07, "loss": 0.6943, "step": 12025 }, { "epoch": 0.8466033086941218, "grad_norm": 1.8337199687957764, "learning_rate": 8.46364541146014e-07, "loss": 0.5756, "step": 12026 }, { "epoch": 0.8466737064413938, "grad_norm": 1.889025330543518, "learning_rate": 8.456039101948525e-07, "loss": 0.7298, "step": 12027 }, { "epoch": 0.846744104188666, "grad_norm": 2.107468366622925, "learning_rate": 8.448435992163161e-07, "loss": 0.6599, "step": 12028 }, { "epoch": 0.8468145019359381, "grad_norm": 1.7836085557937622, "learning_rate": 8.440836082499367e-07, "loss": 0.7036, "step": 12029 }, { "epoch": 0.8468848996832101, "grad_norm": 2.1276419162750244, "learning_rate": 8.433239373352239e-07, "loss": 0.5514, "step": 12030 }, { "epoch": 0.8469552974304823, "grad_norm": 1.9467180967330933, "learning_rate": 8.425645865116769e-07, "loss": 0.7143, "step": 12031 }, { "epoch": 0.8470256951777543, "grad_norm": 2.056413412094116, "learning_rate": 8.418055558187742e-07, "loss": 0.6569, "step": 12032 }, { "epoch": 0.8470960929250264, "grad_norm": 1.853884220123291, "learning_rate": 8.410468452959769e-07, "loss": 0.72, "step": 12033 }, { "epoch": 0.8471664906722984, "grad_norm": 1.782046914100647, "learning_rate": 8.402884549827311e-07, "loss": 0.6393, "step": 12034 }, { "epoch": 0.8472368884195706, "grad_norm": 1.9772406816482544, "learning_rate": 8.395303849184687e-07, "loss": 0.7033, "step": 12035 }, { "epoch": 0.8473072861668427, "grad_norm": 2.4502296447753906, "learning_rate": 8.387726351425995e-07, "loss": 0.7204, "step": 12036 }, { "epoch": 0.8473776839141147, "grad_norm": 2.200040102005005, "learning_rate": 8.380152056945214e-07, "loss": 0.7528, "step": 12037 }, { "epoch": 0.8474480816613869, "grad_norm": 1.8245927095413208, "learning_rate": 8.372580966136145e-07, "loss": 0.7106, "step": 12038 }, { "epoch": 0.8475184794086589, "grad_norm": 1.9589587450027466, "learning_rate": 8.3650130793924e-07, "loss": 0.5593, "step": 12039 }, { "epoch": 0.847588877155931, "grad_norm": 2.1325252056121826, "learning_rate": 8.357448397107431e-07, "loss": 0.6776, "step": 12040 }, { "epoch": 0.8476592749032031, "grad_norm": 1.7983934879302979, "learning_rate": 8.349886919674558e-07, "loss": 0.6617, "step": 12041 }, { "epoch": 0.8477296726504752, "grad_norm": 1.9579949378967285, "learning_rate": 8.342328647486888e-07, "loss": 0.6121, "step": 12042 }, { "epoch": 0.8478000703977473, "grad_norm": 3.013944387435913, "learning_rate": 8.334773580937407e-07, "loss": 0.6077, "step": 12043 }, { "epoch": 0.8478704681450193, "grad_norm": 2.198981761932373, "learning_rate": 8.327221720418888e-07, "loss": 0.6206, "step": 12044 }, { "epoch": 0.8479408658922915, "grad_norm": 1.9666117429733276, "learning_rate": 8.319673066323959e-07, "loss": 0.6497, "step": 12045 }, { "epoch": 0.8480112636395636, "grad_norm": 2.183073043823242, "learning_rate": 8.3121276190451e-07, "loss": 0.656, "step": 12046 }, { "epoch": 0.8480816613868356, "grad_norm": 1.9353358745574951, "learning_rate": 8.30458537897458e-07, "loss": 0.7653, "step": 12047 }, { "epoch": 0.8481520591341077, "grad_norm": 1.8877111673355103, "learning_rate": 8.297046346504551e-07, "loss": 0.6841, "step": 12048 }, { "epoch": 0.8482224568813798, "grad_norm": 2.5490939617156982, "learning_rate": 8.289510522026949e-07, "loss": 0.6385, "step": 12049 }, { "epoch": 0.8482928546286519, "grad_norm": 1.8302912712097168, "learning_rate": 8.281977905933617e-07, "loss": 0.4988, "step": 12050 }, { "epoch": 0.8483632523759239, "grad_norm": 2.543398141860962, "learning_rate": 8.274448498616125e-07, "loss": 0.6106, "step": 12051 }, { "epoch": 0.848433650123196, "grad_norm": 2.00384259223938, "learning_rate": 8.266922300465964e-07, "loss": 0.5193, "step": 12052 }, { "epoch": 0.8485040478704682, "grad_norm": 2.1824889183044434, "learning_rate": 8.25939931187441e-07, "loss": 0.6326, "step": 12053 }, { "epoch": 0.8485744456177402, "grad_norm": 1.7346351146697998, "learning_rate": 8.251879533232613e-07, "loss": 0.6443, "step": 12054 }, { "epoch": 0.8486448433650123, "grad_norm": 2.1501874923706055, "learning_rate": 8.244362964931507e-07, "loss": 0.5437, "step": 12055 }, { "epoch": 0.8487152411122844, "grad_norm": 2.4734530448913574, "learning_rate": 8.236849607361929e-07, "loss": 0.7154, "step": 12056 }, { "epoch": 0.8487856388595565, "grad_norm": 2.052004337310791, "learning_rate": 8.229339460914448e-07, "loss": 0.5953, "step": 12057 }, { "epoch": 0.8488560366068286, "grad_norm": 1.9413179159164429, "learning_rate": 8.221832525979563e-07, "loss": 0.634, "step": 12058 }, { "epoch": 0.8489264343541006, "grad_norm": 2.039217948913574, "learning_rate": 8.214328802947545e-07, "loss": 0.7399, "step": 12059 }, { "epoch": 0.8489968321013728, "grad_norm": 2.323258399963379, "learning_rate": 8.206828292208541e-07, "loss": 0.7823, "step": 12060 }, { "epoch": 0.8490672298486448, "grad_norm": 2.1827962398529053, "learning_rate": 8.199330994152482e-07, "loss": 0.7164, "step": 12061 }, { "epoch": 0.8491376275959169, "grad_norm": 2.299327850341797, "learning_rate": 8.191836909169206e-07, "loss": 0.7477, "step": 12062 }, { "epoch": 0.8492080253431891, "grad_norm": 1.788019061088562, "learning_rate": 8.184346037648278e-07, "loss": 0.6797, "step": 12063 }, { "epoch": 0.8492784230904611, "grad_norm": 1.9028282165527344, "learning_rate": 8.176858379979192e-07, "loss": 0.6312, "step": 12064 }, { "epoch": 0.8493488208377332, "grad_norm": 2.5175983905792236, "learning_rate": 8.169373936551217e-07, "loss": 0.6978, "step": 12065 }, { "epoch": 0.8494192185850052, "grad_norm": 2.034388303756714, "learning_rate": 8.161892707753508e-07, "loss": 0.6578, "step": 12066 }, { "epoch": 0.8494896163322774, "grad_norm": 2.4538657665252686, "learning_rate": 8.154414693974977e-07, "loss": 0.724, "step": 12067 }, { "epoch": 0.8495600140795495, "grad_norm": 2.000366449356079, "learning_rate": 8.146939895604473e-07, "loss": 0.6413, "step": 12068 }, { "epoch": 0.8496304118268215, "grad_norm": 1.8984955549240112, "learning_rate": 8.139468313030553e-07, "loss": 0.628, "step": 12069 }, { "epoch": 0.8497008095740937, "grad_norm": 2.6452651023864746, "learning_rate": 8.131999946641706e-07, "loss": 0.531, "step": 12070 }, { "epoch": 0.8497712073213657, "grad_norm": 1.9814568758010864, "learning_rate": 8.12453479682621e-07, "loss": 0.6368, "step": 12071 }, { "epoch": 0.8498416050686378, "grad_norm": 1.9756944179534912, "learning_rate": 8.11707286397219e-07, "loss": 0.653, "step": 12072 }, { "epoch": 0.8499120028159098, "grad_norm": 2.166168212890625, "learning_rate": 8.109614148467589e-07, "loss": 0.6179, "step": 12073 }, { "epoch": 0.849982400563182, "grad_norm": 2.1702804565429688, "learning_rate": 8.10215865070023e-07, "loss": 0.6458, "step": 12074 }, { "epoch": 0.8500527983104541, "grad_norm": 2.082092761993408, "learning_rate": 8.094706371057665e-07, "loss": 0.5552, "step": 12075 }, { "epoch": 0.8501231960577261, "grad_norm": 2.423396348953247, "learning_rate": 8.0872573099274e-07, "loss": 0.7184, "step": 12076 }, { "epoch": 0.8501935938049983, "grad_norm": 1.8008294105529785, "learning_rate": 8.079811467696685e-07, "loss": 0.6618, "step": 12077 }, { "epoch": 0.8502639915522703, "grad_norm": 1.9954781532287598, "learning_rate": 8.072368844752659e-07, "loss": 0.6424, "step": 12078 }, { "epoch": 0.8503343892995424, "grad_norm": 2.2267613410949707, "learning_rate": 8.064929441482252e-07, "loss": 0.6657, "step": 12079 }, { "epoch": 0.8504047870468145, "grad_norm": 1.9550248384475708, "learning_rate": 8.057493258272273e-07, "loss": 0.6532, "step": 12080 }, { "epoch": 0.8504751847940866, "grad_norm": 2.2800028324127197, "learning_rate": 8.050060295509306e-07, "loss": 0.6215, "step": 12081 }, { "epoch": 0.8505455825413587, "grad_norm": 1.9233009815216064, "learning_rate": 8.042630553579821e-07, "loss": 0.7772, "step": 12082 }, { "epoch": 0.8506159802886307, "grad_norm": 1.6504650115966797, "learning_rate": 8.035204032870066e-07, "loss": 0.6432, "step": 12083 }, { "epoch": 0.8506863780359029, "grad_norm": 2.047360420227051, "learning_rate": 8.027780733766184e-07, "loss": 0.6523, "step": 12084 }, { "epoch": 0.850756775783175, "grad_norm": 2.1061673164367676, "learning_rate": 8.020360656654101e-07, "loss": 0.5555, "step": 12085 }, { "epoch": 0.850827173530447, "grad_norm": 1.98600435256958, "learning_rate": 8.012943801919604e-07, "loss": 0.5546, "step": 12086 }, { "epoch": 0.8508975712777191, "grad_norm": 1.9371720552444458, "learning_rate": 8.005530169948304e-07, "loss": 0.6234, "step": 12087 }, { "epoch": 0.8509679690249912, "grad_norm": 2.0547590255737305, "learning_rate": 7.998119761125638e-07, "loss": 0.6145, "step": 12088 }, { "epoch": 0.8510383667722633, "grad_norm": 2.1735448837280273, "learning_rate": 7.990712575836859e-07, "loss": 0.6687, "step": 12089 }, { "epoch": 0.8511087645195353, "grad_norm": 2.168487787246704, "learning_rate": 7.983308614467101e-07, "loss": 0.6635, "step": 12090 }, { "epoch": 0.8511791622668075, "grad_norm": 3.155900716781616, "learning_rate": 7.975907877401292e-07, "loss": 0.7475, "step": 12091 }, { "epoch": 0.8512495600140796, "grad_norm": 1.934801459312439, "learning_rate": 7.968510365024214e-07, "loss": 0.7552, "step": 12092 }, { "epoch": 0.8513199577613516, "grad_norm": 1.9198135137557983, "learning_rate": 7.961116077720453e-07, "loss": 0.6909, "step": 12093 }, { "epoch": 0.8513903555086237, "grad_norm": 1.7487915754318237, "learning_rate": 7.953725015874451e-07, "loss": 0.5616, "step": 12094 }, { "epoch": 0.8514607532558958, "grad_norm": 1.9127850532531738, "learning_rate": 7.946337179870481e-07, "loss": 0.6442, "step": 12095 }, { "epoch": 0.8515311510031679, "grad_norm": 2.07527756690979, "learning_rate": 7.938952570092631e-07, "loss": 0.6704, "step": 12096 }, { "epoch": 0.85160154875044, "grad_norm": 2.559131622314453, "learning_rate": 7.93157118692486e-07, "loss": 0.59, "step": 12097 }, { "epoch": 0.851671946497712, "grad_norm": 2.102616310119629, "learning_rate": 7.924193030750914e-07, "loss": 0.6475, "step": 12098 }, { "epoch": 0.8517423442449842, "grad_norm": 2.0279722213745117, "learning_rate": 7.916818101954389e-07, "loss": 0.6309, "step": 12099 }, { "epoch": 0.8518127419922562, "grad_norm": 1.86268150806427, "learning_rate": 7.909446400918702e-07, "loss": 0.6802, "step": 12100 }, { "epoch": 0.8518831397395283, "grad_norm": 1.8064039945602417, "learning_rate": 7.902077928027148e-07, "loss": 0.6384, "step": 12101 }, { "epoch": 0.8519535374868005, "grad_norm": 2.2994813919067383, "learning_rate": 7.894712683662785e-07, "loss": 0.634, "step": 12102 }, { "epoch": 0.8520239352340725, "grad_norm": 1.7149685621261597, "learning_rate": 7.887350668208567e-07, "loss": 0.594, "step": 12103 }, { "epoch": 0.8520943329813446, "grad_norm": 1.913367748260498, "learning_rate": 7.879991882047241e-07, "loss": 0.6428, "step": 12104 }, { "epoch": 0.8521647307286166, "grad_norm": 1.9182802438735962, "learning_rate": 7.872636325561403e-07, "loss": 0.6534, "step": 12105 }, { "epoch": 0.8522351284758888, "grad_norm": 2.1323633193969727, "learning_rate": 7.865283999133443e-07, "loss": 0.5459, "step": 12106 }, { "epoch": 0.8523055262231609, "grad_norm": 2.0607919692993164, "learning_rate": 7.857934903145661e-07, "loss": 0.5932, "step": 12107 }, { "epoch": 0.8523759239704329, "grad_norm": 1.9429184198379517, "learning_rate": 7.850589037980115e-07, "loss": 0.6726, "step": 12108 }, { "epoch": 0.8524463217177051, "grad_norm": 2.1002354621887207, "learning_rate": 7.843246404018737e-07, "loss": 0.7198, "step": 12109 }, { "epoch": 0.8525167194649771, "grad_norm": 1.9775296449661255, "learning_rate": 7.835907001643267e-07, "loss": 0.5985, "step": 12110 }, { "epoch": 0.8525871172122492, "grad_norm": 3.001347303390503, "learning_rate": 7.828570831235295e-07, "loss": 0.6357, "step": 12111 }, { "epoch": 0.8526575149595212, "grad_norm": 2.6425821781158447, "learning_rate": 7.82123789317622e-07, "loss": 0.6071, "step": 12112 }, { "epoch": 0.8527279127067934, "grad_norm": 2.197132110595703, "learning_rate": 7.813908187847314e-07, "loss": 0.6903, "step": 12113 }, { "epoch": 0.8527983104540655, "grad_norm": 2.041741371154785, "learning_rate": 7.806581715629618e-07, "loss": 0.616, "step": 12114 }, { "epoch": 0.8528687082013375, "grad_norm": 1.9825263023376465, "learning_rate": 7.799258476904091e-07, "loss": 0.5353, "step": 12115 }, { "epoch": 0.8529391059486097, "grad_norm": 2.6092278957366943, "learning_rate": 7.791938472051442e-07, "loss": 0.6239, "step": 12116 }, { "epoch": 0.8530095036958817, "grad_norm": 2.0818915367126465, "learning_rate": 7.784621701452249e-07, "loss": 0.6461, "step": 12117 }, { "epoch": 0.8530799014431538, "grad_norm": 1.8072972297668457, "learning_rate": 7.7773081654869e-07, "loss": 0.6958, "step": 12118 }, { "epoch": 0.853150299190426, "grad_norm": 1.949683666229248, "learning_rate": 7.769997864535674e-07, "loss": 0.6797, "step": 12119 }, { "epoch": 0.853220696937698, "grad_norm": 2.250246286392212, "learning_rate": 7.762690798978606e-07, "loss": 0.5407, "step": 12120 }, { "epoch": 0.8532910946849701, "grad_norm": 1.5520117282867432, "learning_rate": 7.755386969195618e-07, "loss": 0.5656, "step": 12121 }, { "epoch": 0.8533614924322421, "grad_norm": 2.107489585876465, "learning_rate": 7.748086375566429e-07, "loss": 0.6246, "step": 12122 }, { "epoch": 0.8534318901795143, "grad_norm": 2.381615400314331, "learning_rate": 7.740789018470617e-07, "loss": 0.6622, "step": 12123 }, { "epoch": 0.8535022879267864, "grad_norm": 1.83864426612854, "learning_rate": 7.733494898287551e-07, "loss": 0.683, "step": 12124 }, { "epoch": 0.8535726856740584, "grad_norm": 1.8288240432739258, "learning_rate": 7.726204015396489e-07, "loss": 0.668, "step": 12125 }, { "epoch": 0.8536430834213306, "grad_norm": 1.9510787725448608, "learning_rate": 7.718916370176468e-07, "loss": 0.5597, "step": 12126 }, { "epoch": 0.8537134811686026, "grad_norm": 2.4010512828826904, "learning_rate": 7.711631963006405e-07, "loss": 0.65, "step": 12127 }, { "epoch": 0.8537838789158747, "grad_norm": 1.8233548402786255, "learning_rate": 7.704350794265001e-07, "loss": 0.6041, "step": 12128 }, { "epoch": 0.8538542766631467, "grad_norm": 2.15811824798584, "learning_rate": 7.697072864330824e-07, "loss": 0.6476, "step": 12129 }, { "epoch": 0.8539246744104189, "grad_norm": 2.0297179222106934, "learning_rate": 7.689798173582239e-07, "loss": 0.6697, "step": 12130 }, { "epoch": 0.853995072157691, "grad_norm": 2.068207263946533, "learning_rate": 7.68252672239749e-07, "loss": 0.7319, "step": 12131 }, { "epoch": 0.854065469904963, "grad_norm": 1.9194790124893188, "learning_rate": 7.675258511154608e-07, "loss": 0.7282, "step": 12132 }, { "epoch": 0.8541358676522351, "grad_norm": 2.370466709136963, "learning_rate": 7.667993540231485e-07, "loss": 0.6385, "step": 12133 }, { "epoch": 0.8542062653995072, "grad_norm": 2.166938304901123, "learning_rate": 7.660731810005834e-07, "loss": 0.5815, "step": 12134 }, { "epoch": 0.8542766631467793, "grad_norm": 1.7950276136398315, "learning_rate": 7.653473320855191e-07, "loss": 0.5607, "step": 12135 }, { "epoch": 0.8543470608940514, "grad_norm": 1.8224844932556152, "learning_rate": 7.646218073156926e-07, "loss": 0.6756, "step": 12136 }, { "epoch": 0.8544174586413235, "grad_norm": 2.3032443523406982, "learning_rate": 7.638966067288264e-07, "loss": 0.5057, "step": 12137 }, { "epoch": 0.8544878563885956, "grad_norm": 1.8148366212844849, "learning_rate": 7.631717303626219e-07, "loss": 0.6903, "step": 12138 }, { "epoch": 0.8545582541358676, "grad_norm": 2.009895086288452, "learning_rate": 7.624471782547694e-07, "loss": 0.6768, "step": 12139 }, { "epoch": 0.8546286518831397, "grad_norm": 1.8870183229446411, "learning_rate": 7.617229504429365e-07, "loss": 0.6643, "step": 12140 }, { "epoch": 0.8546990496304119, "grad_norm": 1.9002747535705566, "learning_rate": 7.609990469647775e-07, "loss": 0.5698, "step": 12141 }, { "epoch": 0.8547694473776839, "grad_norm": 1.7270288467407227, "learning_rate": 7.602754678579266e-07, "loss": 0.6369, "step": 12142 }, { "epoch": 0.854839845124956, "grad_norm": 2.180175542831421, "learning_rate": 7.595522131600073e-07, "loss": 0.7091, "step": 12143 }, { "epoch": 0.854910242872228, "grad_norm": 1.936262607574463, "learning_rate": 7.588292829086183e-07, "loss": 0.6355, "step": 12144 }, { "epoch": 0.8549806406195002, "grad_norm": 2.027648448944092, "learning_rate": 7.581066771413486e-07, "loss": 0.6118, "step": 12145 }, { "epoch": 0.8550510383667722, "grad_norm": 2.3917951583862305, "learning_rate": 7.573843958957657e-07, "loss": 0.6667, "step": 12146 }, { "epoch": 0.8551214361140443, "grad_norm": 1.8411661386489868, "learning_rate": 7.56662439209421e-07, "loss": 0.6319, "step": 12147 }, { "epoch": 0.8551918338613165, "grad_norm": 1.8412338495254517, "learning_rate": 7.559408071198512e-07, "loss": 0.5867, "step": 12148 }, { "epoch": 0.8552622316085885, "grad_norm": 2.2204627990722656, "learning_rate": 7.55219499664572e-07, "loss": 0.5446, "step": 12149 }, { "epoch": 0.8553326293558606, "grad_norm": 1.9465546607971191, "learning_rate": 7.544985168810882e-07, "loss": 0.6065, "step": 12150 }, { "epoch": 0.8554030271031327, "grad_norm": 1.9441288709640503, "learning_rate": 7.537778588068834e-07, "loss": 0.7073, "step": 12151 }, { "epoch": 0.8554734248504048, "grad_norm": 2.0028257369995117, "learning_rate": 7.530575254794245e-07, "loss": 0.5664, "step": 12152 }, { "epoch": 0.8555438225976769, "grad_norm": 2.194694995880127, "learning_rate": 7.523375169361602e-07, "loss": 0.7167, "step": 12153 }, { "epoch": 0.8556142203449489, "grad_norm": 1.8612339496612549, "learning_rate": 7.516178332145286e-07, "loss": 0.689, "step": 12154 }, { "epoch": 0.8556846180922211, "grad_norm": 2.12447452545166, "learning_rate": 7.508984743519433e-07, "loss": 0.6423, "step": 12155 }, { "epoch": 0.8557550158394931, "grad_norm": 1.9065877199172974, "learning_rate": 7.501794403858075e-07, "loss": 0.7028, "step": 12156 }, { "epoch": 0.8558254135867652, "grad_norm": 1.8404210805892944, "learning_rate": 7.494607313535008e-07, "loss": 0.5846, "step": 12157 }, { "epoch": 0.8558958113340374, "grad_norm": 1.981136679649353, "learning_rate": 7.487423472923949e-07, "loss": 0.615, "step": 12158 }, { "epoch": 0.8559662090813094, "grad_norm": 2.107985258102417, "learning_rate": 7.480242882398325e-07, "loss": 0.7377, "step": 12159 }, { "epoch": 0.8560366068285815, "grad_norm": 1.8776301145553589, "learning_rate": 7.47306554233151e-07, "loss": 0.5529, "step": 12160 }, { "epoch": 0.8561070045758535, "grad_norm": 2.342283248901367, "learning_rate": 7.465891453096633e-07, "loss": 0.5709, "step": 12161 }, { "epoch": 0.8561774023231257, "grad_norm": 2.122431755065918, "learning_rate": 7.458720615066706e-07, "loss": 0.6733, "step": 12162 }, { "epoch": 0.8562478000703978, "grad_norm": 2.1144227981567383, "learning_rate": 7.451553028614521e-07, "loss": 0.6225, "step": 12163 }, { "epoch": 0.8563181978176698, "grad_norm": 2.1301469802856445, "learning_rate": 7.444388694112766e-07, "loss": 0.6415, "step": 12164 }, { "epoch": 0.856388595564942, "grad_norm": 1.9728256464004517, "learning_rate": 7.437227611933877e-07, "loss": 0.7061, "step": 12165 }, { "epoch": 0.856458993312214, "grad_norm": 2.5734989643096924, "learning_rate": 7.430069782450197e-07, "loss": 0.6341, "step": 12166 }, { "epoch": 0.8565293910594861, "grad_norm": 1.871416687965393, "learning_rate": 7.422915206033835e-07, "loss": 0.6418, "step": 12167 }, { "epoch": 0.8565997888067581, "grad_norm": 2.025885820388794, "learning_rate": 7.415763883056801e-07, "loss": 0.7255, "step": 12168 }, { "epoch": 0.8566701865540303, "grad_norm": 1.7988250255584717, "learning_rate": 7.408615813890872e-07, "loss": 0.7015, "step": 12169 }, { "epoch": 0.8567405843013024, "grad_norm": 2.2621381282806396, "learning_rate": 7.401470998907721e-07, "loss": 0.6372, "step": 12170 }, { "epoch": 0.8568109820485744, "grad_norm": 2.0480589866638184, "learning_rate": 7.394329438478751e-07, "loss": 0.7108, "step": 12171 }, { "epoch": 0.8568813797958466, "grad_norm": 2.332232713699341, "learning_rate": 7.387191132975312e-07, "loss": 0.6493, "step": 12172 }, { "epoch": 0.8569517775431186, "grad_norm": 2.340855836868286, "learning_rate": 7.380056082768495e-07, "loss": 0.6763, "step": 12173 }, { "epoch": 0.8570221752903907, "grad_norm": 1.9872361421585083, "learning_rate": 7.372924288229292e-07, "loss": 0.5821, "step": 12174 }, { "epoch": 0.8570925730376628, "grad_norm": 2.1895592212677, "learning_rate": 7.365795749728452e-07, "loss": 0.6568, "step": 12175 }, { "epoch": 0.8571629707849349, "grad_norm": 1.952144980430603, "learning_rate": 7.358670467636648e-07, "loss": 0.7146, "step": 12176 }, { "epoch": 0.857233368532207, "grad_norm": 2.3143491744995117, "learning_rate": 7.351548442324272e-07, "loss": 0.7156, "step": 12177 }, { "epoch": 0.857303766279479, "grad_norm": 2.196525812149048, "learning_rate": 7.344429674161647e-07, "loss": 0.6681, "step": 12178 }, { "epoch": 0.8573741640267512, "grad_norm": 1.8202887773513794, "learning_rate": 7.337314163518847e-07, "loss": 0.5853, "step": 12179 }, { "epoch": 0.8574445617740233, "grad_norm": 1.9669896364212036, "learning_rate": 7.330201910765852e-07, "loss": 0.666, "step": 12180 }, { "epoch": 0.8575149595212953, "grad_norm": 2.899357795715332, "learning_rate": 7.323092916272395e-07, "loss": 0.6569, "step": 12181 }, { "epoch": 0.8575853572685674, "grad_norm": 2.0070855617523193, "learning_rate": 7.315987180408139e-07, "loss": 0.6519, "step": 12182 }, { "epoch": 0.8576557550158395, "grad_norm": 1.6909377574920654, "learning_rate": 7.308884703542448e-07, "loss": 0.5464, "step": 12183 }, { "epoch": 0.8577261527631116, "grad_norm": 1.6118720769882202, "learning_rate": 7.30178548604463e-07, "loss": 0.6783, "step": 12184 }, { "epoch": 0.8577965505103836, "grad_norm": 1.7495582103729248, "learning_rate": 7.294689528283753e-07, "loss": 0.7096, "step": 12185 }, { "epoch": 0.8578669482576557, "grad_norm": 2.192085027694702, "learning_rate": 7.287596830628774e-07, "loss": 0.7411, "step": 12186 }, { "epoch": 0.8579373460049279, "grad_norm": 1.9461082220077515, "learning_rate": 7.280507393448414e-07, "loss": 0.5938, "step": 12187 }, { "epoch": 0.8580077437521999, "grad_norm": 2.347353935241699, "learning_rate": 7.273421217111294e-07, "loss": 0.6025, "step": 12188 }, { "epoch": 0.858078141499472, "grad_norm": 2.173781156539917, "learning_rate": 7.266338301985818e-07, "loss": 0.6357, "step": 12189 }, { "epoch": 0.8581485392467441, "grad_norm": 1.988889217376709, "learning_rate": 7.259258648440216e-07, "loss": 0.701, "step": 12190 }, { "epoch": 0.8582189369940162, "grad_norm": 1.7890304327011108, "learning_rate": 7.252182256842574e-07, "loss": 0.6314, "step": 12191 }, { "epoch": 0.8582893347412883, "grad_norm": 1.8918514251708984, "learning_rate": 7.245109127560812e-07, "loss": 0.6431, "step": 12192 }, { "epoch": 0.8583597324885603, "grad_norm": 2.151613712310791, "learning_rate": 7.23803926096265e-07, "loss": 0.5764, "step": 12193 }, { "epoch": 0.8584301302358325, "grad_norm": 1.8541706800460815, "learning_rate": 7.230972657415683e-07, "loss": 0.71, "step": 12194 }, { "epoch": 0.8585005279831045, "grad_norm": 1.838590145111084, "learning_rate": 7.223909317287295e-07, "loss": 0.7138, "step": 12195 }, { "epoch": 0.8585709257303766, "grad_norm": 1.9017055034637451, "learning_rate": 7.216849240944694e-07, "loss": 0.6606, "step": 12196 }, { "epoch": 0.8586413234776488, "grad_norm": 1.9152257442474365, "learning_rate": 7.209792428754985e-07, "loss": 0.6584, "step": 12197 }, { "epoch": 0.8587117212249208, "grad_norm": 1.9669020175933838, "learning_rate": 7.202738881085016e-07, "loss": 0.653, "step": 12198 }, { "epoch": 0.8587821189721929, "grad_norm": 2.9137351512908936, "learning_rate": 7.195688598301533e-07, "loss": 0.5998, "step": 12199 }, { "epoch": 0.8588525167194649, "grad_norm": 2.199464797973633, "learning_rate": 7.188641580771086e-07, "loss": 0.6061, "step": 12200 }, { "epoch": 0.8589229144667371, "grad_norm": 2.0012755393981934, "learning_rate": 7.181597828860046e-07, "loss": 0.6659, "step": 12201 }, { "epoch": 0.8589933122140091, "grad_norm": 1.8894436359405518, "learning_rate": 7.17455734293461e-07, "loss": 0.7565, "step": 12202 }, { "epoch": 0.8590637099612812, "grad_norm": 2.1361453533172607, "learning_rate": 7.167520123360849e-07, "loss": 0.5952, "step": 12203 }, { "epoch": 0.8591341077085534, "grad_norm": 3.7478480339050293, "learning_rate": 7.160486170504612e-07, "loss": 0.6556, "step": 12204 }, { "epoch": 0.8592045054558254, "grad_norm": 2.9292428493499756, "learning_rate": 7.153455484731626e-07, "loss": 0.6806, "step": 12205 }, { "epoch": 0.8592749032030975, "grad_norm": 1.8225903511047363, "learning_rate": 7.146428066407403e-07, "loss": 0.7101, "step": 12206 }, { "epoch": 0.8593453009503695, "grad_norm": 2.573578119277954, "learning_rate": 7.139403915897308e-07, "loss": 0.6031, "step": 12207 }, { "epoch": 0.8594156986976417, "grad_norm": 1.718753695487976, "learning_rate": 7.132383033566513e-07, "loss": 0.6527, "step": 12208 }, { "epoch": 0.8594860964449138, "grad_norm": 2.025161027908325, "learning_rate": 7.125365419780084e-07, "loss": 0.618, "step": 12209 }, { "epoch": 0.8595564941921858, "grad_norm": 1.7555241584777832, "learning_rate": 7.118351074902835e-07, "loss": 0.5351, "step": 12210 }, { "epoch": 0.859626891939458, "grad_norm": 1.7950536012649536, "learning_rate": 7.11133999929947e-07, "loss": 0.632, "step": 12211 }, { "epoch": 0.85969728968673, "grad_norm": 1.8013205528259277, "learning_rate": 7.104332193334499e-07, "loss": 0.7026, "step": 12212 }, { "epoch": 0.8597676874340021, "grad_norm": 2.7835729122161865, "learning_rate": 7.097327657372259e-07, "loss": 0.584, "step": 12213 }, { "epoch": 0.8598380851812742, "grad_norm": 2.832486152648926, "learning_rate": 7.090326391776904e-07, "loss": 0.6998, "step": 12214 }, { "epoch": 0.8599084829285463, "grad_norm": 2.318272352218628, "learning_rate": 7.083328396912468e-07, "loss": 0.6422, "step": 12215 }, { "epoch": 0.8599788806758184, "grad_norm": 1.7277363538742065, "learning_rate": 7.076333673142761e-07, "loss": 0.6709, "step": 12216 }, { "epoch": 0.8600492784230904, "grad_norm": 2.106096029281616, "learning_rate": 7.069342220831462e-07, "loss": 0.7015, "step": 12217 }, { "epoch": 0.8601196761703626, "grad_norm": 2.198626756668091, "learning_rate": 7.062354040342063e-07, "loss": 0.6123, "step": 12218 }, { "epoch": 0.8601900739176347, "grad_norm": 1.8306388854980469, "learning_rate": 7.055369132037866e-07, "loss": 0.5889, "step": 12219 }, { "epoch": 0.8602604716649067, "grad_norm": 2.060499668121338, "learning_rate": 7.048387496282031e-07, "loss": 0.682, "step": 12220 }, { "epoch": 0.8603308694121788, "grad_norm": 1.7447426319122314, "learning_rate": 7.041409133437544e-07, "loss": 0.7, "step": 12221 }, { "epoch": 0.8604012671594509, "grad_norm": 1.7603721618652344, "learning_rate": 7.034434043867212e-07, "loss": 0.5735, "step": 12222 }, { "epoch": 0.860471664906723, "grad_norm": 1.9317619800567627, "learning_rate": 7.027462227933686e-07, "loss": 0.6547, "step": 12223 }, { "epoch": 0.860542062653995, "grad_norm": 2.7453114986419678, "learning_rate": 7.020493685999433e-07, "loss": 0.6795, "step": 12224 }, { "epoch": 0.8606124604012672, "grad_norm": 2.217226505279541, "learning_rate": 7.013528418426749e-07, "loss": 0.7533, "step": 12225 }, { "epoch": 0.8606828581485393, "grad_norm": 2.8593389987945557, "learning_rate": 7.006566425577755e-07, "loss": 0.5366, "step": 12226 }, { "epoch": 0.8607532558958113, "grad_norm": 1.5845566987991333, "learning_rate": 6.999607707814435e-07, "loss": 0.5539, "step": 12227 }, { "epoch": 0.8608236536430834, "grad_norm": 2.0742924213409424, "learning_rate": 6.99265226549855e-07, "loss": 0.7766, "step": 12228 }, { "epoch": 0.8608940513903555, "grad_norm": 2.3513288497924805, "learning_rate": 6.985700098991755e-07, "loss": 0.7099, "step": 12229 }, { "epoch": 0.8609644491376276, "grad_norm": 3.824047565460205, "learning_rate": 6.978751208655482e-07, "loss": 0.7542, "step": 12230 }, { "epoch": 0.8610348468848997, "grad_norm": 2.351215362548828, "learning_rate": 6.971805594851002e-07, "loss": 0.6761, "step": 12231 }, { "epoch": 0.8611052446321718, "grad_norm": 3.058062791824341, "learning_rate": 6.964863257939425e-07, "loss": 0.6762, "step": 12232 }, { "epoch": 0.8611756423794439, "grad_norm": 2.221510410308838, "learning_rate": 6.957924198281704e-07, "loss": 0.7208, "step": 12233 }, { "epoch": 0.8612460401267159, "grad_norm": 1.9123719930648804, "learning_rate": 6.950988416238586e-07, "loss": 0.5892, "step": 12234 }, { "epoch": 0.861316437873988, "grad_norm": 1.9603482484817505, "learning_rate": 6.944055912170695e-07, "loss": 0.6283, "step": 12235 }, { "epoch": 0.8613868356212602, "grad_norm": 2.205613136291504, "learning_rate": 6.937126686438444e-07, "loss": 0.6198, "step": 12236 }, { "epoch": 0.8614572333685322, "grad_norm": 2.24139666557312, "learning_rate": 6.930200739402093e-07, "loss": 0.6363, "step": 12237 }, { "epoch": 0.8615276311158043, "grad_norm": 2.237309455871582, "learning_rate": 6.923278071421704e-07, "loss": 0.6405, "step": 12238 }, { "epoch": 0.8615980288630763, "grad_norm": 1.9342663288116455, "learning_rate": 6.916358682857235e-07, "loss": 0.6623, "step": 12239 }, { "epoch": 0.8616684266103485, "grad_norm": 1.7722220420837402, "learning_rate": 6.909442574068392e-07, "loss": 0.7095, "step": 12240 }, { "epoch": 0.8617388243576205, "grad_norm": 1.8531336784362793, "learning_rate": 6.90252974541478e-07, "loss": 0.621, "step": 12241 }, { "epoch": 0.8618092221048926, "grad_norm": 1.9714322090148926, "learning_rate": 6.895620197255794e-07, "loss": 0.7039, "step": 12242 }, { "epoch": 0.8618796198521648, "grad_norm": 2.0464155673980713, "learning_rate": 6.88871392995066e-07, "loss": 0.6278, "step": 12243 }, { "epoch": 0.8619500175994368, "grad_norm": 1.8525066375732422, "learning_rate": 6.881810943858433e-07, "loss": 0.8225, "step": 12244 }, { "epoch": 0.8620204153467089, "grad_norm": 2.530982494354248, "learning_rate": 6.874911239338025e-07, "loss": 0.6432, "step": 12245 }, { "epoch": 0.862090813093981, "grad_norm": 1.8591593503952026, "learning_rate": 6.868014816748157e-07, "loss": 0.6734, "step": 12246 }, { "epoch": 0.8621612108412531, "grad_norm": 2.10263729095459, "learning_rate": 6.861121676447377e-07, "loss": 0.6079, "step": 12247 }, { "epoch": 0.8622316085885252, "grad_norm": 2.0360233783721924, "learning_rate": 6.854231818794063e-07, "loss": 0.6553, "step": 12248 }, { "epoch": 0.8623020063357972, "grad_norm": 2.0961086750030518, "learning_rate": 6.847345244146414e-07, "loss": 0.6672, "step": 12249 }, { "epoch": 0.8623724040830694, "grad_norm": 1.8538607358932495, "learning_rate": 6.84046195286249e-07, "loss": 0.5681, "step": 12250 }, { "epoch": 0.8624428018303414, "grad_norm": 1.9557244777679443, "learning_rate": 6.833581945300139e-07, "loss": 0.6575, "step": 12251 }, { "epoch": 0.8625131995776135, "grad_norm": 1.946211814880371, "learning_rate": 6.826705221817086e-07, "loss": 0.6134, "step": 12252 }, { "epoch": 0.8625835973248857, "grad_norm": 2.5263001918792725, "learning_rate": 6.819831782770832e-07, "loss": 0.7783, "step": 12253 }, { "epoch": 0.8626539950721577, "grad_norm": 1.810930609703064, "learning_rate": 6.812961628518751e-07, "loss": 0.6221, "step": 12254 }, { "epoch": 0.8627243928194298, "grad_norm": 1.719546914100647, "learning_rate": 6.806094759418009e-07, "loss": 0.6943, "step": 12255 }, { "epoch": 0.8627947905667018, "grad_norm": 1.71648371219635, "learning_rate": 6.799231175825638e-07, "loss": 0.5387, "step": 12256 }, { "epoch": 0.862865188313974, "grad_norm": 2.33948016166687, "learning_rate": 6.792370878098464e-07, "loss": 0.7657, "step": 12257 }, { "epoch": 0.862935586061246, "grad_norm": 1.9201332330703735, "learning_rate": 6.785513866593191e-07, "loss": 0.6817, "step": 12258 }, { "epoch": 0.8630059838085181, "grad_norm": 1.9587985277175903, "learning_rate": 6.778660141666294e-07, "loss": 0.6202, "step": 12259 }, { "epoch": 0.8630763815557903, "grad_norm": 2.7265472412109375, "learning_rate": 6.771809703674125e-07, "loss": 0.7218, "step": 12260 }, { "epoch": 0.8631467793030623, "grad_norm": 1.7775119543075562, "learning_rate": 6.764962552972811e-07, "loss": 0.6527, "step": 12261 }, { "epoch": 0.8632171770503344, "grad_norm": 1.885414958000183, "learning_rate": 6.758118689918378e-07, "loss": 0.7085, "step": 12262 }, { "epoch": 0.8632875747976064, "grad_norm": 1.796035885810852, "learning_rate": 6.751278114866615e-07, "loss": 0.6002, "step": 12263 }, { "epoch": 0.8633579725448786, "grad_norm": 1.932136058807373, "learning_rate": 6.744440828173194e-07, "loss": 0.6878, "step": 12264 }, { "epoch": 0.8634283702921507, "grad_norm": 2.3438398838043213, "learning_rate": 6.737606830193575e-07, "loss": 0.7259, "step": 12265 }, { "epoch": 0.8634987680394227, "grad_norm": 1.8484212160110474, "learning_rate": 6.730776121283091e-07, "loss": 0.6813, "step": 12266 }, { "epoch": 0.8635691657866948, "grad_norm": 1.73306143283844, "learning_rate": 6.723948701796838e-07, "loss": 0.6487, "step": 12267 }, { "epoch": 0.8636395635339669, "grad_norm": 1.9522175788879395, "learning_rate": 6.71712457208981e-07, "loss": 0.6972, "step": 12268 }, { "epoch": 0.863709961281239, "grad_norm": 1.8504985570907593, "learning_rate": 6.710303732516772e-07, "loss": 0.5603, "step": 12269 }, { "epoch": 0.8637803590285111, "grad_norm": 1.7786047458648682, "learning_rate": 6.70348618343237e-07, "loss": 0.7636, "step": 12270 }, { "epoch": 0.8638507567757832, "grad_norm": 2.256232738494873, "learning_rate": 6.696671925191035e-07, "loss": 0.7214, "step": 12271 }, { "epoch": 0.8639211545230553, "grad_norm": 1.9394261837005615, "learning_rate": 6.689860958147089e-07, "loss": 0.5906, "step": 12272 }, { "epoch": 0.8639915522703273, "grad_norm": 1.7506892681121826, "learning_rate": 6.68305328265458e-07, "loss": 0.6271, "step": 12273 }, { "epoch": 0.8640619500175994, "grad_norm": 2.4459354877471924, "learning_rate": 6.676248899067486e-07, "loss": 0.5533, "step": 12274 }, { "epoch": 0.8641323477648716, "grad_norm": 1.8939160108566284, "learning_rate": 6.669447807739554e-07, "loss": 0.5894, "step": 12275 }, { "epoch": 0.8642027455121436, "grad_norm": 2.035979747772217, "learning_rate": 6.662650009024391e-07, "loss": 0.5725, "step": 12276 }, { "epoch": 0.8642731432594157, "grad_norm": 1.753029704093933, "learning_rate": 6.655855503275408e-07, "loss": 0.5431, "step": 12277 }, { "epoch": 0.8643435410066878, "grad_norm": 24.344524383544922, "learning_rate": 6.649064290845891e-07, "loss": 0.5504, "step": 12278 }, { "epoch": 0.8644139387539599, "grad_norm": 2.0214591026306152, "learning_rate": 6.642276372088864e-07, "loss": 0.5853, "step": 12279 }, { "epoch": 0.8644843365012319, "grad_norm": 1.9576324224472046, "learning_rate": 6.635491747357281e-07, "loss": 0.494, "step": 12280 }, { "epoch": 0.864554734248504, "grad_norm": 1.9089299440383911, "learning_rate": 6.628710417003863e-07, "loss": 0.6052, "step": 12281 }, { "epoch": 0.8646251319957762, "grad_norm": 1.6759536266326904, "learning_rate": 6.621932381381194e-07, "loss": 0.6235, "step": 12282 }, { "epoch": 0.8646955297430482, "grad_norm": 1.6584700345993042, "learning_rate": 6.615157640841641e-07, "loss": 0.5806, "step": 12283 }, { "epoch": 0.8647659274903203, "grad_norm": 2.0703604221343994, "learning_rate": 6.608386195737479e-07, "loss": 0.6998, "step": 12284 }, { "epoch": 0.8648363252375924, "grad_norm": 2.2783470153808594, "learning_rate": 6.601618046420697e-07, "loss": 0.5912, "step": 12285 }, { "epoch": 0.8649067229848645, "grad_norm": 1.6384145021438599, "learning_rate": 6.594853193243232e-07, "loss": 0.5489, "step": 12286 }, { "epoch": 0.8649771207321366, "grad_norm": 2.1358044147491455, "learning_rate": 6.588091636556753e-07, "loss": 0.7793, "step": 12287 }, { "epoch": 0.8650475184794086, "grad_norm": 2.850532293319702, "learning_rate": 6.581333376712832e-07, "loss": 0.6101, "step": 12288 }, { "epoch": 0.8651179162266808, "grad_norm": 1.9525861740112305, "learning_rate": 6.574578414062811e-07, "loss": 0.6478, "step": 12289 }, { "epoch": 0.8651883139739528, "grad_norm": 1.7365671396255493, "learning_rate": 6.567826748957927e-07, "loss": 0.6787, "step": 12290 }, { "epoch": 0.8652587117212249, "grad_norm": 2.0831897258758545, "learning_rate": 6.561078381749152e-07, "loss": 0.6971, "step": 12291 }, { "epoch": 0.8653291094684971, "grad_norm": 2.1527087688446045, "learning_rate": 6.554333312787378e-07, "loss": 0.6744, "step": 12292 }, { "epoch": 0.8653995072157691, "grad_norm": 1.6069968938827515, "learning_rate": 6.54759154242326e-07, "loss": 0.6252, "step": 12293 }, { "epoch": 0.8654699049630412, "grad_norm": 2.120293617248535, "learning_rate": 6.540853071007341e-07, "loss": 0.6082, "step": 12294 }, { "epoch": 0.8655403027103132, "grad_norm": 1.8694043159484863, "learning_rate": 6.534117898889932e-07, "loss": 0.6347, "step": 12295 }, { "epoch": 0.8656107004575854, "grad_norm": 1.883455514907837, "learning_rate": 6.527386026421219e-07, "loss": 0.6206, "step": 12296 }, { "epoch": 0.8656810982048574, "grad_norm": 1.9862228631973267, "learning_rate": 6.520657453951188e-07, "loss": 0.6563, "step": 12297 }, { "epoch": 0.8657514959521295, "grad_norm": 2.9440689086914062, "learning_rate": 6.513932181829658e-07, "loss": 0.5369, "step": 12298 }, { "epoch": 0.8658218936994017, "grad_norm": 1.6704999208450317, "learning_rate": 6.507210210406305e-07, "loss": 0.486, "step": 12299 }, { "epoch": 0.8658922914466737, "grad_norm": 1.7548023462295532, "learning_rate": 6.500491540030582e-07, "loss": 0.5713, "step": 12300 }, { "epoch": 0.8659626891939458, "grad_norm": 1.8087455034255981, "learning_rate": 6.493776171051817e-07, "loss": 0.6688, "step": 12301 }, { "epoch": 0.8660330869412178, "grad_norm": 2.422179937362671, "learning_rate": 6.487064103819152e-07, "loss": 0.6866, "step": 12302 }, { "epoch": 0.86610348468849, "grad_norm": 1.82688570022583, "learning_rate": 6.480355338681547e-07, "loss": 0.5653, "step": 12303 }, { "epoch": 0.8661738824357621, "grad_norm": 2.292933702468872, "learning_rate": 6.473649875987773e-07, "loss": 0.6276, "step": 12304 }, { "epoch": 0.8662442801830341, "grad_norm": 2.2834343910217285, "learning_rate": 6.466947716086495e-07, "loss": 0.5951, "step": 12305 }, { "epoch": 0.8663146779303063, "grad_norm": 2.302008867263794, "learning_rate": 6.460248859326126e-07, "loss": 0.6537, "step": 12306 }, { "epoch": 0.8663850756775783, "grad_norm": 2.095219612121582, "learning_rate": 6.453553306054981e-07, "loss": 0.5673, "step": 12307 }, { "epoch": 0.8664554734248504, "grad_norm": 2.066807508468628, "learning_rate": 6.446861056621147e-07, "loss": 0.6117, "step": 12308 }, { "epoch": 0.8665258711721225, "grad_norm": 2.1426174640655518, "learning_rate": 6.440172111372565e-07, "loss": 0.6145, "step": 12309 }, { "epoch": 0.8665962689193946, "grad_norm": 1.8237351179122925, "learning_rate": 6.43348647065699e-07, "loss": 0.6372, "step": 12310 }, { "epoch": 0.8666666666666667, "grad_norm": 2.8520400524139404, "learning_rate": 6.426804134822034e-07, "loss": 0.6482, "step": 12311 }, { "epoch": 0.8667370644139387, "grad_norm": 2.030606746673584, "learning_rate": 6.420125104215087e-07, "loss": 0.6361, "step": 12312 }, { "epoch": 0.8668074621612109, "grad_norm": 1.8557727336883545, "learning_rate": 6.413449379183438e-07, "loss": 0.6086, "step": 12313 }, { "epoch": 0.866877859908483, "grad_norm": 1.845473289489746, "learning_rate": 6.406776960074139e-07, "loss": 0.6118, "step": 12314 }, { "epoch": 0.866948257655755, "grad_norm": 1.9028384685516357, "learning_rate": 6.4001078472341e-07, "loss": 0.7202, "step": 12315 }, { "epoch": 0.8670186554030271, "grad_norm": 1.9366062879562378, "learning_rate": 6.393442041010039e-07, "loss": 0.6053, "step": 12316 }, { "epoch": 0.8670890531502992, "grad_norm": 4.0813374519348145, "learning_rate": 6.386779541748545e-07, "loss": 0.6177, "step": 12317 }, { "epoch": 0.8671594508975713, "grad_norm": 1.9982905387878418, "learning_rate": 6.380120349795982e-07, "loss": 0.6814, "step": 12318 }, { "epoch": 0.8672298486448433, "grad_norm": 1.788461685180664, "learning_rate": 6.373464465498594e-07, "loss": 0.6626, "step": 12319 }, { "epoch": 0.8673002463921154, "grad_norm": 1.9240549802780151, "learning_rate": 6.366811889202403e-07, "loss": 0.6193, "step": 12320 }, { "epoch": 0.8673706441393876, "grad_norm": 1.9687443971633911, "learning_rate": 6.3601626212533e-07, "loss": 0.6119, "step": 12321 }, { "epoch": 0.8674410418866596, "grad_norm": 2.328357219696045, "learning_rate": 6.353516661996969e-07, "loss": 0.6782, "step": 12322 }, { "epoch": 0.8675114396339317, "grad_norm": 1.6942709684371948, "learning_rate": 6.346874011778951e-07, "loss": 0.6259, "step": 12323 }, { "epoch": 0.8675818373812038, "grad_norm": 2.30438494682312, "learning_rate": 6.340234670944597e-07, "loss": 0.7025, "step": 12324 }, { "epoch": 0.8676522351284759, "grad_norm": 1.968029499053955, "learning_rate": 6.333598639839106e-07, "loss": 0.6074, "step": 12325 }, { "epoch": 0.867722632875748, "grad_norm": 1.9899897575378418, "learning_rate": 6.326965918807479e-07, "loss": 0.7077, "step": 12326 }, { "epoch": 0.86779303062302, "grad_norm": 1.8205324411392212, "learning_rate": 6.320336508194558e-07, "loss": 0.6521, "step": 12327 }, { "epoch": 0.8678634283702922, "grad_norm": 1.8948554992675781, "learning_rate": 6.31371040834501e-07, "loss": 0.6306, "step": 12328 }, { "epoch": 0.8679338261175642, "grad_norm": 1.7037118673324585, "learning_rate": 6.30708761960334e-07, "loss": 0.5964, "step": 12329 }, { "epoch": 0.8680042238648363, "grad_norm": 1.9155439138412476, "learning_rate": 6.300468142313862e-07, "loss": 0.6721, "step": 12330 }, { "epoch": 0.8680746216121085, "grad_norm": 2.0127551555633545, "learning_rate": 6.293851976820736e-07, "loss": 0.6299, "step": 12331 }, { "epoch": 0.8681450193593805, "grad_norm": 1.697637915611267, "learning_rate": 6.28723912346795e-07, "loss": 0.5587, "step": 12332 }, { "epoch": 0.8682154171066526, "grad_norm": 1.9747779369354248, "learning_rate": 6.280629582599309e-07, "loss": 0.7881, "step": 12333 }, { "epoch": 0.8682858148539246, "grad_norm": 1.8473942279815674, "learning_rate": 6.274023354558423e-07, "loss": 0.6125, "step": 12334 }, { "epoch": 0.8683562126011968, "grad_norm": 1.9317309856414795, "learning_rate": 6.26742043968879e-07, "loss": 0.7428, "step": 12335 }, { "epoch": 0.8684266103484688, "grad_norm": 1.9018001556396484, "learning_rate": 6.260820838333678e-07, "loss": 0.6234, "step": 12336 }, { "epoch": 0.8684970080957409, "grad_norm": 1.9823906421661377, "learning_rate": 6.254224550836226e-07, "loss": 0.634, "step": 12337 }, { "epoch": 0.8685674058430131, "grad_norm": 2.0588581562042236, "learning_rate": 6.247631577539368e-07, "loss": 0.6893, "step": 12338 }, { "epoch": 0.8686378035902851, "grad_norm": 2.0016465187072754, "learning_rate": 6.241041918785887e-07, "loss": 0.6497, "step": 12339 }, { "epoch": 0.8687082013375572, "grad_norm": 1.9175257682800293, "learning_rate": 6.234455574918366e-07, "loss": 0.6145, "step": 12340 }, { "epoch": 0.8687785990848292, "grad_norm": 2.6499195098876953, "learning_rate": 6.227872546279268e-07, "loss": 0.7077, "step": 12341 }, { "epoch": 0.8688489968321014, "grad_norm": 2.0057642459869385, "learning_rate": 6.221292833210814e-07, "loss": 0.5858, "step": 12342 }, { "epoch": 0.8689193945793735, "grad_norm": 1.6899651288986206, "learning_rate": 6.214716436055115e-07, "loss": 0.5702, "step": 12343 }, { "epoch": 0.8689897923266455, "grad_norm": 1.6656993627548218, "learning_rate": 6.208143355154082e-07, "loss": 0.6558, "step": 12344 }, { "epoch": 0.8690601900739177, "grad_norm": 1.9014239311218262, "learning_rate": 6.201573590849438e-07, "loss": 0.6759, "step": 12345 }, { "epoch": 0.8691305878211897, "grad_norm": 1.91402268409729, "learning_rate": 6.195007143482764e-07, "loss": 0.5364, "step": 12346 }, { "epoch": 0.8692009855684618, "grad_norm": 1.941772222518921, "learning_rate": 6.188444013395454e-07, "loss": 0.7203, "step": 12347 }, { "epoch": 0.869271383315734, "grad_norm": 1.9245855808258057, "learning_rate": 6.181884200928737e-07, "loss": 0.6009, "step": 12348 }, { "epoch": 0.869341781063006, "grad_norm": 2.39880108833313, "learning_rate": 6.175327706423662e-07, "loss": 0.7607, "step": 12349 }, { "epoch": 0.8694121788102781, "grad_norm": 2.0807368755340576, "learning_rate": 6.168774530221098e-07, "loss": 0.7003, "step": 12350 }, { "epoch": 0.8694825765575501, "grad_norm": 1.5776948928833008, "learning_rate": 6.162224672661743e-07, "loss": 0.6656, "step": 12351 }, { "epoch": 0.8695529743048223, "grad_norm": 1.6801567077636719, "learning_rate": 6.155678134086152e-07, "loss": 0.5676, "step": 12352 }, { "epoch": 0.8696233720520943, "grad_norm": 1.8275877237319946, "learning_rate": 6.149134914834668e-07, "loss": 0.5115, "step": 12353 }, { "epoch": 0.8696937697993664, "grad_norm": 2.2930023670196533, "learning_rate": 6.142595015247504e-07, "loss": 0.5123, "step": 12354 }, { "epoch": 0.8697641675466385, "grad_norm": 2.5919103622436523, "learning_rate": 6.136058435664644e-07, "loss": 0.6281, "step": 12355 }, { "epoch": 0.8698345652939106, "grad_norm": 2.013655662536621, "learning_rate": 6.129525176425954e-07, "loss": 0.6439, "step": 12356 }, { "epoch": 0.8699049630411827, "grad_norm": 1.983436107635498, "learning_rate": 6.122995237871081e-07, "loss": 0.5693, "step": 12357 }, { "epoch": 0.8699753607884547, "grad_norm": 1.9990464448928833, "learning_rate": 6.116468620339552e-07, "loss": 0.6091, "step": 12358 }, { "epoch": 0.8700457585357269, "grad_norm": 1.92135751247406, "learning_rate": 6.109945324170654e-07, "loss": 0.783, "step": 12359 }, { "epoch": 0.870116156282999, "grad_norm": 2.202084541320801, "learning_rate": 6.103425349703578e-07, "loss": 0.634, "step": 12360 }, { "epoch": 0.870186554030271, "grad_norm": 1.8450795412063599, "learning_rate": 6.09690869727729e-07, "loss": 0.6587, "step": 12361 }, { "epoch": 0.8702569517775431, "grad_norm": 2.2180373668670654, "learning_rate": 6.090395367230589e-07, "loss": 0.5746, "step": 12362 }, { "epoch": 0.8703273495248152, "grad_norm": 2.175124406814575, "learning_rate": 6.0838853599021e-07, "loss": 0.666, "step": 12363 }, { "epoch": 0.8703977472720873, "grad_norm": 1.9541453123092651, "learning_rate": 6.077378675630311e-07, "loss": 0.5614, "step": 12364 }, { "epoch": 0.8704681450193594, "grad_norm": 1.8550477027893066, "learning_rate": 6.070875314753481e-07, "loss": 0.7179, "step": 12365 }, { "epoch": 0.8705385427666315, "grad_norm": 1.7106709480285645, "learning_rate": 6.064375277609757e-07, "loss": 0.5462, "step": 12366 }, { "epoch": 0.8706089405139036, "grad_norm": 2.519601821899414, "learning_rate": 6.057878564537063e-07, "loss": 0.6386, "step": 12367 }, { "epoch": 0.8706793382611756, "grad_norm": 1.831817865371704, "learning_rate": 6.051385175873173e-07, "loss": 0.7393, "step": 12368 }, { "epoch": 0.8707497360084477, "grad_norm": 1.8567488193511963, "learning_rate": 6.044895111955677e-07, "loss": 0.6745, "step": 12369 }, { "epoch": 0.8708201337557199, "grad_norm": 1.7132319211959839, "learning_rate": 6.038408373122008e-07, "loss": 0.6774, "step": 12370 }, { "epoch": 0.8708905315029919, "grad_norm": 2.3973114490509033, "learning_rate": 6.031924959709406e-07, "loss": 0.5934, "step": 12371 }, { "epoch": 0.870960929250264, "grad_norm": 1.7032952308654785, "learning_rate": 6.025444872054969e-07, "loss": 0.6769, "step": 12372 }, { "epoch": 0.871031326997536, "grad_norm": 2.3492016792297363, "learning_rate": 6.018968110495587e-07, "loss": 0.677, "step": 12373 }, { "epoch": 0.8711017247448082, "grad_norm": 1.8388519287109375, "learning_rate": 6.012494675368009e-07, "loss": 0.6226, "step": 12374 }, { "epoch": 0.8711721224920802, "grad_norm": 1.8002142906188965, "learning_rate": 6.006024567008768e-07, "loss": 0.5597, "step": 12375 }, { "epoch": 0.8712425202393523, "grad_norm": 2.0324923992156982, "learning_rate": 5.999557785754279e-07, "loss": 0.7168, "step": 12376 }, { "epoch": 0.8713129179866245, "grad_norm": 2.35260272026062, "learning_rate": 5.993094331940732e-07, "loss": 0.7353, "step": 12377 }, { "epoch": 0.8713833157338965, "grad_norm": 1.69230318069458, "learning_rate": 5.986634205904183e-07, "loss": 0.6423, "step": 12378 }, { "epoch": 0.8714537134811686, "grad_norm": 1.7984284162521362, "learning_rate": 5.98017740798049e-07, "loss": 0.6651, "step": 12379 }, { "epoch": 0.8715241112284406, "grad_norm": 2.5513641834259033, "learning_rate": 5.973723938505377e-07, "loss": 0.6131, "step": 12380 }, { "epoch": 0.8715945089757128, "grad_norm": 2.3677849769592285, "learning_rate": 5.967273797814308e-07, "loss": 0.7608, "step": 12381 }, { "epoch": 0.8716649067229849, "grad_norm": 2.2552201747894287, "learning_rate": 5.960826986242692e-07, "loss": 0.73, "step": 12382 }, { "epoch": 0.8717353044702569, "grad_norm": 2.024350643157959, "learning_rate": 5.95438350412566e-07, "loss": 0.6776, "step": 12383 }, { "epoch": 0.8718057022175291, "grad_norm": 2.015204668045044, "learning_rate": 5.947943351798244e-07, "loss": 0.5926, "step": 12384 }, { "epoch": 0.8718760999648011, "grad_norm": 2.203486204147339, "learning_rate": 5.941506529595251e-07, "loss": 0.6735, "step": 12385 }, { "epoch": 0.8719464977120732, "grad_norm": 1.7439419031143188, "learning_rate": 5.93507303785137e-07, "loss": 0.6527, "step": 12386 }, { "epoch": 0.8720168954593454, "grad_norm": 1.9150508642196655, "learning_rate": 5.928642876901044e-07, "loss": 0.6283, "step": 12387 }, { "epoch": 0.8720872932066174, "grad_norm": 1.9373284578323364, "learning_rate": 5.922216047078606e-07, "loss": 0.6548, "step": 12388 }, { "epoch": 0.8721576909538895, "grad_norm": 2.159534454345703, "learning_rate": 5.915792548718184e-07, "loss": 0.6496, "step": 12389 }, { "epoch": 0.8722280887011615, "grad_norm": 1.8774043321609497, "learning_rate": 5.909372382153755e-07, "loss": 0.7095, "step": 12390 }, { "epoch": 0.8722984864484337, "grad_norm": 1.7596157789230347, "learning_rate": 5.902955547719083e-07, "loss": 0.567, "step": 12391 }, { "epoch": 0.8723688841957057, "grad_norm": 1.8083730936050415, "learning_rate": 5.896542045747825e-07, "loss": 0.6833, "step": 12392 }, { "epoch": 0.8724392819429778, "grad_norm": 1.9431865215301514, "learning_rate": 5.890131876573382e-07, "loss": 0.6908, "step": 12393 }, { "epoch": 0.87250967969025, "grad_norm": 2.0456631183624268, "learning_rate": 5.883725040529059e-07, "loss": 0.6424, "step": 12394 }, { "epoch": 0.872580077437522, "grad_norm": 1.7156262397766113, "learning_rate": 5.877321537947924e-07, "loss": 0.6213, "step": 12395 }, { "epoch": 0.8726504751847941, "grad_norm": 1.801667332649231, "learning_rate": 5.870921369162908e-07, "loss": 0.5569, "step": 12396 }, { "epoch": 0.8727208729320661, "grad_norm": 2.290043830871582, "learning_rate": 5.86452453450679e-07, "loss": 0.6165, "step": 12397 }, { "epoch": 0.8727912706793383, "grad_norm": 1.9641053676605225, "learning_rate": 5.858131034312125e-07, "loss": 0.5659, "step": 12398 }, { "epoch": 0.8728616684266104, "grad_norm": 1.9753097295761108, "learning_rate": 5.851740868911315e-07, "loss": 0.759, "step": 12399 }, { "epoch": 0.8729320661738824, "grad_norm": 1.4204447269439697, "learning_rate": 5.845354038636582e-07, "loss": 0.7223, "step": 12400 }, { "epoch": 0.8730024639211545, "grad_norm": 2.232449531555176, "learning_rate": 5.838970543820003e-07, "loss": 0.6494, "step": 12401 }, { "epoch": 0.8730728616684266, "grad_norm": 1.7102608680725098, "learning_rate": 5.832590384793446e-07, "loss": 0.6349, "step": 12402 }, { "epoch": 0.8731432594156987, "grad_norm": 1.7840009927749634, "learning_rate": 5.826213561888641e-07, "loss": 0.6526, "step": 12403 }, { "epoch": 0.8732136571629708, "grad_norm": 1.9471572637557983, "learning_rate": 5.819840075437108e-07, "loss": 0.6155, "step": 12404 }, { "epoch": 0.8732840549102429, "grad_norm": 1.906006097793579, "learning_rate": 5.813469925770219e-07, "loss": 0.6339, "step": 12405 }, { "epoch": 0.873354452657515, "grad_norm": 2.3006038665771484, "learning_rate": 5.807103113219141e-07, "loss": 0.6731, "step": 12406 }, { "epoch": 0.873424850404787, "grad_norm": 1.7832194566726685, "learning_rate": 5.800739638114921e-07, "loss": 0.7547, "step": 12407 }, { "epoch": 0.8734952481520591, "grad_norm": 1.7783010005950928, "learning_rate": 5.794379500788387e-07, "loss": 0.6103, "step": 12408 }, { "epoch": 0.8735656458993312, "grad_norm": 2.200235605239868, "learning_rate": 5.78802270157021e-07, "loss": 0.658, "step": 12409 }, { "epoch": 0.8736360436466033, "grad_norm": 1.9313105344772339, "learning_rate": 5.7816692407909e-07, "loss": 0.6821, "step": 12410 }, { "epoch": 0.8737064413938754, "grad_norm": 1.9369288682937622, "learning_rate": 5.775319118780755e-07, "loss": 0.6289, "step": 12411 }, { "epoch": 0.8737768391411475, "grad_norm": 1.803001046180725, "learning_rate": 5.768972335869927e-07, "loss": 0.6339, "step": 12412 }, { "epoch": 0.8738472368884196, "grad_norm": 2.151129961013794, "learning_rate": 5.762628892388405e-07, "loss": 0.6873, "step": 12413 }, { "epoch": 0.8739176346356916, "grad_norm": 1.7585601806640625, "learning_rate": 5.756288788665982e-07, "loss": 0.6773, "step": 12414 }, { "epoch": 0.8739880323829637, "grad_norm": 2.863351583480835, "learning_rate": 5.749952025032289e-07, "loss": 0.6264, "step": 12415 }, { "epoch": 0.8740584301302359, "grad_norm": 2.662703275680542, "learning_rate": 5.743618601816786e-07, "loss": 0.6624, "step": 12416 }, { "epoch": 0.8741288278775079, "grad_norm": 1.9904201030731201, "learning_rate": 5.737288519348746e-07, "loss": 0.6606, "step": 12417 }, { "epoch": 0.87419922562478, "grad_norm": 1.8840415477752686, "learning_rate": 5.730961777957255e-07, "loss": 0.5538, "step": 12418 }, { "epoch": 0.874269623372052, "grad_norm": 2.1769895553588867, "learning_rate": 5.724638377971285e-07, "loss": 0.7278, "step": 12419 }, { "epoch": 0.8743400211193242, "grad_norm": 1.707557201385498, "learning_rate": 5.718318319719564e-07, "loss": 0.5268, "step": 12420 }, { "epoch": 0.8744104188665963, "grad_norm": 1.8136482238769531, "learning_rate": 5.712001603530705e-07, "loss": 0.6198, "step": 12421 }, { "epoch": 0.8744808166138683, "grad_norm": 1.9212180376052856, "learning_rate": 5.705688229733104e-07, "loss": 0.5136, "step": 12422 }, { "epoch": 0.8745512143611405, "grad_norm": 1.9870349168777466, "learning_rate": 5.699378198654999e-07, "loss": 0.6271, "step": 12423 }, { "epoch": 0.8746216121084125, "grad_norm": 2.0328502655029297, "learning_rate": 5.693071510624443e-07, "loss": 0.6022, "step": 12424 }, { "epoch": 0.8746920098556846, "grad_norm": 1.9485429525375366, "learning_rate": 5.686768165969352e-07, "loss": 0.584, "step": 12425 }, { "epoch": 0.8747624076029568, "grad_norm": 2.210902452468872, "learning_rate": 5.680468165017418e-07, "loss": 0.6783, "step": 12426 }, { "epoch": 0.8748328053502288, "grad_norm": 2.555391550064087, "learning_rate": 5.674171508096199e-07, "loss": 0.6397, "step": 12427 }, { "epoch": 0.8749032030975009, "grad_norm": 2.215306043624878, "learning_rate": 5.667878195533072e-07, "loss": 0.6291, "step": 12428 }, { "epoch": 0.8749736008447729, "grad_norm": 1.6926283836364746, "learning_rate": 5.66158822765522e-07, "loss": 0.6467, "step": 12429 }, { "epoch": 0.8750439985920451, "grad_norm": 2.199791193008423, "learning_rate": 5.655301604789646e-07, "loss": 0.5689, "step": 12430 }, { "epoch": 0.8751143963393171, "grad_norm": 2.328932285308838, "learning_rate": 5.649018327263229e-07, "loss": 0.7356, "step": 12431 }, { "epoch": 0.8751847940865892, "grad_norm": 1.7139474153518677, "learning_rate": 5.642738395402617e-07, "loss": 0.5869, "step": 12432 }, { "epoch": 0.8752551918338614, "grad_norm": 1.9066331386566162, "learning_rate": 5.636461809534331e-07, "loss": 0.4834, "step": 12433 }, { "epoch": 0.8753255895811334, "grad_norm": 1.7696985006332397, "learning_rate": 5.630188569984692e-07, "loss": 0.6874, "step": 12434 }, { "epoch": 0.8753959873284055, "grad_norm": 1.8335795402526855, "learning_rate": 5.623918677079841e-07, "loss": 0.589, "step": 12435 }, { "epoch": 0.8754663850756775, "grad_norm": 2.240257501602173, "learning_rate": 5.617652131145756e-07, "loss": 0.6691, "step": 12436 }, { "epoch": 0.8755367828229497, "grad_norm": 2.310007333755493, "learning_rate": 5.611388932508253e-07, "loss": 0.6309, "step": 12437 }, { "epoch": 0.8756071805702218, "grad_norm": 1.7370781898498535, "learning_rate": 5.605129081492935e-07, "loss": 0.5741, "step": 12438 }, { "epoch": 0.8756775783174938, "grad_norm": 1.9046759605407715, "learning_rate": 5.598872578425291e-07, "loss": 0.5974, "step": 12439 }, { "epoch": 0.875747976064766, "grad_norm": 1.7231807708740234, "learning_rate": 5.592619423630594e-07, "loss": 0.5822, "step": 12440 }, { "epoch": 0.875818373812038, "grad_norm": 2.024609327316284, "learning_rate": 5.586369617433933e-07, "loss": 0.5909, "step": 12441 }, { "epoch": 0.8758887715593101, "grad_norm": 2.0159311294555664, "learning_rate": 5.580123160160249e-07, "loss": 0.6831, "step": 12442 }, { "epoch": 0.8759591693065822, "grad_norm": 1.6980421543121338, "learning_rate": 5.573880052134311e-07, "loss": 0.5514, "step": 12443 }, { "epoch": 0.8760295670538543, "grad_norm": 2.2040481567382812, "learning_rate": 5.567640293680686e-07, "loss": 0.7179, "step": 12444 }, { "epoch": 0.8760999648011264, "grad_norm": 2.4337949752807617, "learning_rate": 5.561403885123803e-07, "loss": 0.7542, "step": 12445 }, { "epoch": 0.8761703625483984, "grad_norm": 1.8271018266677856, "learning_rate": 5.555170826787902e-07, "loss": 0.6199, "step": 12446 }, { "epoch": 0.8762407602956706, "grad_norm": 2.082618474960327, "learning_rate": 5.548941118997008e-07, "loss": 0.622, "step": 12447 }, { "epoch": 0.8763111580429426, "grad_norm": 1.8145813941955566, "learning_rate": 5.542714762075057e-07, "loss": 0.6685, "step": 12448 }, { "epoch": 0.8763815557902147, "grad_norm": 2.1360180377960205, "learning_rate": 5.536491756345726e-07, "loss": 0.5714, "step": 12449 }, { "epoch": 0.8764519535374868, "grad_norm": 1.9111604690551758, "learning_rate": 5.530272102132584e-07, "loss": 0.6015, "step": 12450 }, { "epoch": 0.8765223512847589, "grad_norm": 2.142665147781372, "learning_rate": 5.52405579975899e-07, "loss": 0.5533, "step": 12451 }, { "epoch": 0.876592749032031, "grad_norm": 2.005434513092041, "learning_rate": 5.517842849548117e-07, "loss": 0.5728, "step": 12452 }, { "epoch": 0.876663146779303, "grad_norm": 1.7838457822799683, "learning_rate": 5.511633251822982e-07, "loss": 0.6009, "step": 12453 }, { "epoch": 0.8767335445265751, "grad_norm": 1.8212769031524658, "learning_rate": 5.505427006906453e-07, "loss": 0.6253, "step": 12454 }, { "epoch": 0.8768039422738473, "grad_norm": 2.0761964321136475, "learning_rate": 5.49922411512117e-07, "loss": 0.6613, "step": 12455 }, { "epoch": 0.8768743400211193, "grad_norm": 2.0871469974517822, "learning_rate": 5.493024576789657e-07, "loss": 0.6646, "step": 12456 }, { "epoch": 0.8769447377683914, "grad_norm": 1.8835831880569458, "learning_rate": 5.486828392234217e-07, "loss": 0.656, "step": 12457 }, { "epoch": 0.8770151355156635, "grad_norm": 2.1265852451324463, "learning_rate": 5.480635561776991e-07, "loss": 0.6879, "step": 12458 }, { "epoch": 0.8770855332629356, "grad_norm": 1.6158206462860107, "learning_rate": 5.474446085739945e-07, "loss": 0.7158, "step": 12459 }, { "epoch": 0.8771559310102077, "grad_norm": 1.847436785697937, "learning_rate": 5.468259964444892e-07, "loss": 0.5653, "step": 12460 }, { "epoch": 0.8772263287574797, "grad_norm": 1.9006954431533813, "learning_rate": 5.462077198213441e-07, "loss": 0.5939, "step": 12461 }, { "epoch": 0.8772967265047519, "grad_norm": 1.6521570682525635, "learning_rate": 5.455897787367051e-07, "loss": 0.5965, "step": 12462 }, { "epoch": 0.8773671242520239, "grad_norm": 2.0698721408843994, "learning_rate": 5.449721732226995e-07, "loss": 0.5513, "step": 12463 }, { "epoch": 0.877437521999296, "grad_norm": 1.7844667434692383, "learning_rate": 5.443549033114365e-07, "loss": 0.6949, "step": 12464 }, { "epoch": 0.877507919746568, "grad_norm": 2.3367984294891357, "learning_rate": 5.437379690350077e-07, "loss": 0.7059, "step": 12465 }, { "epoch": 0.8775783174938402, "grad_norm": 1.9479585886001587, "learning_rate": 5.431213704254897e-07, "loss": 0.7052, "step": 12466 }, { "epoch": 0.8776487152411123, "grad_norm": 1.9008338451385498, "learning_rate": 5.425051075149385e-07, "loss": 0.5676, "step": 12467 }, { "epoch": 0.8777191129883843, "grad_norm": 2.0254533290863037, "learning_rate": 5.418891803353963e-07, "loss": 0.7215, "step": 12468 }, { "epoch": 0.8777895107356565, "grad_norm": 2.3364129066467285, "learning_rate": 5.412735889188843e-07, "loss": 0.7401, "step": 12469 }, { "epoch": 0.8778599084829285, "grad_norm": 1.8128169775009155, "learning_rate": 5.406583332974073e-07, "loss": 0.6386, "step": 12470 }, { "epoch": 0.8779303062302006, "grad_norm": 2.2829535007476807, "learning_rate": 5.400434135029531e-07, "loss": 0.5878, "step": 12471 }, { "epoch": 0.8780007039774728, "grad_norm": 2.1493473052978516, "learning_rate": 5.39428829567493e-07, "loss": 0.5299, "step": 12472 }, { "epoch": 0.8780711017247448, "grad_norm": 2.1487510204315186, "learning_rate": 5.388145815229782e-07, "loss": 0.6902, "step": 12473 }, { "epoch": 0.8781414994720169, "grad_norm": 2.0615949630737305, "learning_rate": 5.382006694013454e-07, "loss": 0.6149, "step": 12474 }, { "epoch": 0.8782118972192889, "grad_norm": 2.0165295600891113, "learning_rate": 5.37587093234513e-07, "loss": 0.5507, "step": 12475 }, { "epoch": 0.8782822949665611, "grad_norm": 1.7045530080795288, "learning_rate": 5.369738530543792e-07, "loss": 0.5077, "step": 12476 }, { "epoch": 0.8783526927138332, "grad_norm": 1.7953816652297974, "learning_rate": 5.363609488928281e-07, "loss": 0.6033, "step": 12477 }, { "epoch": 0.8784230904611052, "grad_norm": 1.8475604057312012, "learning_rate": 5.357483807817249e-07, "loss": 0.6679, "step": 12478 }, { "epoch": 0.8784934882083774, "grad_norm": 1.8167989253997803, "learning_rate": 5.351361487529179e-07, "loss": 0.6268, "step": 12479 }, { "epoch": 0.8785638859556494, "grad_norm": 1.7776645421981812, "learning_rate": 5.345242528382375e-07, "loss": 0.5207, "step": 12480 }, { "epoch": 0.8786342837029215, "grad_norm": 1.9120320081710815, "learning_rate": 5.33912693069496e-07, "loss": 0.6566, "step": 12481 }, { "epoch": 0.8787046814501936, "grad_norm": 1.9108085632324219, "learning_rate": 5.333014694784922e-07, "loss": 0.6423, "step": 12482 }, { "epoch": 0.8787750791974657, "grad_norm": 1.7291969060897827, "learning_rate": 5.326905820969988e-07, "loss": 0.7108, "step": 12483 }, { "epoch": 0.8788454769447378, "grad_norm": 1.8108457326889038, "learning_rate": 5.320800309567809e-07, "loss": 0.692, "step": 12484 }, { "epoch": 0.8789158746920098, "grad_norm": 1.7530781030654907, "learning_rate": 5.314698160895786e-07, "loss": 0.6408, "step": 12485 }, { "epoch": 0.878986272439282, "grad_norm": 2.7339794635772705, "learning_rate": 5.308599375271193e-07, "loss": 0.6382, "step": 12486 }, { "epoch": 0.879056670186554, "grad_norm": 2.2469475269317627, "learning_rate": 5.302503953011101e-07, "loss": 0.5786, "step": 12487 }, { "epoch": 0.8791270679338261, "grad_norm": 1.9358872175216675, "learning_rate": 5.296411894432443e-07, "loss": 0.6362, "step": 12488 }, { "epoch": 0.8791974656810982, "grad_norm": 2.5186924934387207, "learning_rate": 5.29032319985191e-07, "loss": 0.6177, "step": 12489 }, { "epoch": 0.8792678634283703, "grad_norm": 1.8338857889175415, "learning_rate": 5.284237869586082e-07, "loss": 0.5527, "step": 12490 }, { "epoch": 0.8793382611756424, "grad_norm": 2.0719635486602783, "learning_rate": 5.278155903951335e-07, "loss": 0.6138, "step": 12491 }, { "epoch": 0.8794086589229144, "grad_norm": 2.0458948612213135, "learning_rate": 5.272077303263884e-07, "loss": 0.6184, "step": 12492 }, { "epoch": 0.8794790566701866, "grad_norm": 2.2301459312438965, "learning_rate": 5.266002067839744e-07, "loss": 0.5578, "step": 12493 }, { "epoch": 0.8795494544174587, "grad_norm": 1.4917396306991577, "learning_rate": 5.259930197994805e-07, "loss": 0.6909, "step": 12494 }, { "epoch": 0.8796198521647307, "grad_norm": 2.2213997840881348, "learning_rate": 5.253861694044711e-07, "loss": 0.7882, "step": 12495 }, { "epoch": 0.8796902499120028, "grad_norm": 2.350996255874634, "learning_rate": 5.247796556304979e-07, "loss": 0.6377, "step": 12496 }, { "epoch": 0.8797606476592749, "grad_norm": 1.7027032375335693, "learning_rate": 5.241734785090963e-07, "loss": 0.6019, "step": 12497 }, { "epoch": 0.879831045406547, "grad_norm": 2.1899254322052, "learning_rate": 5.235676380717783e-07, "loss": 0.6112, "step": 12498 }, { "epoch": 0.8799014431538191, "grad_norm": 2.197096347808838, "learning_rate": 5.229621343500461e-07, "loss": 0.5626, "step": 12499 }, { "epoch": 0.8799718409010912, "grad_norm": 1.9483237266540527, "learning_rate": 5.223569673753784e-07, "loss": 0.6702, "step": 12500 }, { "epoch": 0.8800422386483633, "grad_norm": 1.7283504009246826, "learning_rate": 5.217521371792383e-07, "loss": 0.5245, "step": 12501 }, { "epoch": 0.8801126363956353, "grad_norm": 2.3502001762390137, "learning_rate": 5.211476437930711e-07, "loss": 0.5854, "step": 12502 }, { "epoch": 0.8801830341429074, "grad_norm": 1.7420787811279297, "learning_rate": 5.205434872483058e-07, "loss": 0.5895, "step": 12503 }, { "epoch": 0.8802534318901795, "grad_norm": 1.764638900756836, "learning_rate": 5.199396675763519e-07, "loss": 0.6983, "step": 12504 }, { "epoch": 0.8803238296374516, "grad_norm": 2.60969614982605, "learning_rate": 5.19336184808605e-07, "loss": 0.6494, "step": 12505 }, { "epoch": 0.8803942273847237, "grad_norm": 2.0727624893188477, "learning_rate": 5.187330389764388e-07, "loss": 0.7484, "step": 12506 }, { "epoch": 0.8804646251319957, "grad_norm": 2.236168384552002, "learning_rate": 5.181302301112118e-07, "loss": 0.5733, "step": 12507 }, { "epoch": 0.8805350228792679, "grad_norm": 1.9175970554351807, "learning_rate": 5.175277582442624e-07, "loss": 0.7172, "step": 12508 }, { "epoch": 0.8806054206265399, "grad_norm": 2.018249273300171, "learning_rate": 5.169256234069173e-07, "loss": 0.6834, "step": 12509 }, { "epoch": 0.880675818373812, "grad_norm": 2.1356425285339355, "learning_rate": 5.163238256304796e-07, "loss": 0.6243, "step": 12510 }, { "epoch": 0.8807462161210842, "grad_norm": 1.8183988332748413, "learning_rate": 5.157223649462398e-07, "loss": 0.6033, "step": 12511 }, { "epoch": 0.8808166138683562, "grad_norm": 2.1137032508850098, "learning_rate": 5.15121241385466e-07, "loss": 0.6445, "step": 12512 }, { "epoch": 0.8808870116156283, "grad_norm": 1.846226453781128, "learning_rate": 5.145204549794123e-07, "loss": 0.6165, "step": 12513 }, { "epoch": 0.8809574093629003, "grad_norm": 1.7362251281738281, "learning_rate": 5.139200057593127e-07, "loss": 0.5688, "step": 12514 }, { "epoch": 0.8810278071101725, "grad_norm": 2.1299259662628174, "learning_rate": 5.133198937563863e-07, "loss": 0.6372, "step": 12515 }, { "epoch": 0.8810982048574446, "grad_norm": 1.8625078201293945, "learning_rate": 5.127201190018335e-07, "loss": 0.6451, "step": 12516 }, { "epoch": 0.8811686026047166, "grad_norm": 3.2149338722229004, "learning_rate": 5.121206815268371e-07, "loss": 0.73, "step": 12517 }, { "epoch": 0.8812390003519888, "grad_norm": 1.9278578758239746, "learning_rate": 5.115215813625624e-07, "loss": 0.6175, "step": 12518 }, { "epoch": 0.8813093980992608, "grad_norm": 2.0707976818084717, "learning_rate": 5.109228185401572e-07, "loss": 0.6079, "step": 12519 }, { "epoch": 0.8813797958465329, "grad_norm": 1.9656046628952026, "learning_rate": 5.103243930907503e-07, "loss": 0.6668, "step": 12520 }, { "epoch": 0.881450193593805, "grad_norm": 2.0888001918792725, "learning_rate": 5.09726305045457e-07, "loss": 0.6767, "step": 12521 }, { "epoch": 0.8815205913410771, "grad_norm": 2.4927759170532227, "learning_rate": 5.091285544353703e-07, "loss": 0.718, "step": 12522 }, { "epoch": 0.8815909890883492, "grad_norm": 1.7889806032180786, "learning_rate": 5.085311412915687e-07, "loss": 0.6844, "step": 12523 }, { "epoch": 0.8816613868356212, "grad_norm": 1.725429654121399, "learning_rate": 5.079340656451128e-07, "loss": 0.5482, "step": 12524 }, { "epoch": 0.8817317845828934, "grad_norm": 2.248323678970337, "learning_rate": 5.07337327527044e-07, "loss": 0.7097, "step": 12525 }, { "epoch": 0.8818021823301654, "grad_norm": 2.371589183807373, "learning_rate": 5.067409269683861e-07, "loss": 0.5871, "step": 12526 }, { "epoch": 0.8818725800774375, "grad_norm": 1.9698947668075562, "learning_rate": 5.061448640001499e-07, "loss": 0.645, "step": 12527 }, { "epoch": 0.8819429778247097, "grad_norm": 1.9110363721847534, "learning_rate": 5.055491386533224e-07, "loss": 0.691, "step": 12528 }, { "epoch": 0.8820133755719817, "grad_norm": 1.8444054126739502, "learning_rate": 5.049537509588771e-07, "loss": 0.6373, "step": 12529 }, { "epoch": 0.8820837733192538, "grad_norm": 2.128666639328003, "learning_rate": 5.043587009477696e-07, "loss": 0.6567, "step": 12530 }, { "epoch": 0.8821541710665258, "grad_norm": 1.8344435691833496, "learning_rate": 5.037639886509354e-07, "loss": 0.5835, "step": 12531 }, { "epoch": 0.882224568813798, "grad_norm": 1.9073823690414429, "learning_rate": 5.031696140992938e-07, "loss": 0.5663, "step": 12532 }, { "epoch": 0.8822949665610701, "grad_norm": 2.0453548431396484, "learning_rate": 5.025755773237489e-07, "loss": 0.6041, "step": 12533 }, { "epoch": 0.8823653643083421, "grad_norm": 2.1344330310821533, "learning_rate": 5.019818783551828e-07, "loss": 0.6739, "step": 12534 }, { "epoch": 0.8824357620556142, "grad_norm": 2.2291245460510254, "learning_rate": 5.013885172244657e-07, "loss": 0.5836, "step": 12535 }, { "epoch": 0.8825061598028863, "grad_norm": 1.75200617313385, "learning_rate": 5.007954939624443e-07, "loss": 0.6428, "step": 12536 }, { "epoch": 0.8825765575501584, "grad_norm": 1.747405767440796, "learning_rate": 5.002028085999515e-07, "loss": 0.5865, "step": 12537 }, { "epoch": 0.8826469552974305, "grad_norm": 1.766391396522522, "learning_rate": 4.996104611678003e-07, "loss": 0.6186, "step": 12538 }, { "epoch": 0.8827173530447026, "grad_norm": 1.986774206161499, "learning_rate": 4.99018451696789e-07, "loss": 0.6582, "step": 12539 }, { "epoch": 0.8827877507919747, "grad_norm": 1.932236909866333, "learning_rate": 4.984267802176956e-07, "loss": 0.6017, "step": 12540 }, { "epoch": 0.8828581485392467, "grad_norm": 1.9888869524002075, "learning_rate": 4.978354467612822e-07, "loss": 0.6197, "step": 12541 }, { "epoch": 0.8829285462865188, "grad_norm": 2.3829824924468994, "learning_rate": 4.972444513582936e-07, "loss": 0.7048, "step": 12542 }, { "epoch": 0.8829989440337909, "grad_norm": 2.0170528888702393, "learning_rate": 4.966537940394543e-07, "loss": 0.6764, "step": 12543 }, { "epoch": 0.883069341781063, "grad_norm": 2.070763349533081, "learning_rate": 4.960634748354734e-07, "loss": 0.7168, "step": 12544 }, { "epoch": 0.8831397395283351, "grad_norm": 1.8647924661636353, "learning_rate": 4.954734937770439e-07, "loss": 0.6324, "step": 12545 }, { "epoch": 0.8832101372756072, "grad_norm": 2.0136897563934326, "learning_rate": 4.948838508948372e-07, "loss": 0.6489, "step": 12546 }, { "epoch": 0.8832805350228793, "grad_norm": 1.8455528020858765, "learning_rate": 4.94294546219512e-07, "loss": 0.7315, "step": 12547 }, { "epoch": 0.8833509327701513, "grad_norm": 1.8983827829360962, "learning_rate": 4.937055797817045e-07, "loss": 0.7392, "step": 12548 }, { "epoch": 0.8834213305174234, "grad_norm": 4.205691814422607, "learning_rate": 4.931169516120355e-07, "loss": 0.7103, "step": 12549 }, { "epoch": 0.8834917282646956, "grad_norm": 2.2361857891082764, "learning_rate": 4.925286617411103e-07, "loss": 0.5571, "step": 12550 }, { "epoch": 0.8835621260119676, "grad_norm": 1.7480322122573853, "learning_rate": 4.919407101995128e-07, "loss": 0.6179, "step": 12551 }, { "epoch": 0.8836325237592397, "grad_norm": 1.9113088846206665, "learning_rate": 4.913530970178127e-07, "loss": 0.6369, "step": 12552 }, { "epoch": 0.8837029215065118, "grad_norm": 1.9788504838943481, "learning_rate": 4.907658222265592e-07, "loss": 0.6537, "step": 12553 }, { "epoch": 0.8837733192537839, "grad_norm": 2.1421337127685547, "learning_rate": 4.901788858562859e-07, "loss": 0.6859, "step": 12554 }, { "epoch": 0.883843717001056, "grad_norm": 2.737111806869507, "learning_rate": 4.895922879375071e-07, "loss": 0.6934, "step": 12555 }, { "epoch": 0.883914114748328, "grad_norm": 1.8041478395462036, "learning_rate": 4.890060285007223e-07, "loss": 0.5781, "step": 12556 }, { "epoch": 0.8839845124956002, "grad_norm": 2.1074395179748535, "learning_rate": 4.8842010757641e-07, "loss": 0.6392, "step": 12557 }, { "epoch": 0.8840549102428722, "grad_norm": 1.9360331296920776, "learning_rate": 4.878345251950346e-07, "loss": 0.6259, "step": 12558 }, { "epoch": 0.8841253079901443, "grad_norm": 1.9327516555786133, "learning_rate": 4.8724928138704e-07, "loss": 0.5636, "step": 12559 }, { "epoch": 0.8841957057374163, "grad_norm": 1.8306328058242798, "learning_rate": 4.86664376182853e-07, "loss": 0.7617, "step": 12560 }, { "epoch": 0.8842661034846885, "grad_norm": 2.0171685218811035, "learning_rate": 4.860798096128831e-07, "loss": 0.6347, "step": 12561 }, { "epoch": 0.8843365012319606, "grad_norm": 2.2314300537109375, "learning_rate": 4.854955817075242e-07, "loss": 0.5936, "step": 12562 }, { "epoch": 0.8844068989792326, "grad_norm": 1.6845163106918335, "learning_rate": 4.849116924971489e-07, "loss": 0.6114, "step": 12563 }, { "epoch": 0.8844772967265048, "grad_norm": 2.020432949066162, "learning_rate": 4.84328142012117e-07, "loss": 0.6631, "step": 12564 }, { "epoch": 0.8845476944737768, "grad_norm": 2.068474531173706, "learning_rate": 4.837449302827647e-07, "loss": 0.5934, "step": 12565 }, { "epoch": 0.8846180922210489, "grad_norm": 1.97312331199646, "learning_rate": 4.831620573394159e-07, "loss": 0.6651, "step": 12566 }, { "epoch": 0.8846884899683211, "grad_norm": 1.785732388496399, "learning_rate": 4.82579523212372e-07, "loss": 0.5195, "step": 12567 }, { "epoch": 0.8847588877155931, "grad_norm": 2.500920534133911, "learning_rate": 4.819973279319232e-07, "loss": 0.6702, "step": 12568 }, { "epoch": 0.8848292854628652, "grad_norm": 1.9539536237716675, "learning_rate": 4.814154715283344e-07, "loss": 0.595, "step": 12569 }, { "epoch": 0.8848996832101372, "grad_norm": 2.789226770401001, "learning_rate": 4.808339540318603e-07, "loss": 0.6999, "step": 12570 }, { "epoch": 0.8849700809574094, "grad_norm": 2.1186294555664062, "learning_rate": 4.802527754727324e-07, "loss": 0.5425, "step": 12571 }, { "epoch": 0.8850404787046815, "grad_norm": 1.9864985942840576, "learning_rate": 4.796719358811678e-07, "loss": 0.791, "step": 12572 }, { "epoch": 0.8851108764519535, "grad_norm": 2.0700480937957764, "learning_rate": 4.790914352873639e-07, "loss": 0.6638, "step": 12573 }, { "epoch": 0.8851812741992257, "grad_norm": 2.1172220706939697, "learning_rate": 4.785112737215023e-07, "loss": 0.6276, "step": 12574 }, { "epoch": 0.8852516719464977, "grad_norm": 2.698059558868408, "learning_rate": 4.779314512137452e-07, "loss": 0.6711, "step": 12575 }, { "epoch": 0.8853220696937698, "grad_norm": 1.9960970878601074, "learning_rate": 4.773519677942393e-07, "loss": 0.6787, "step": 12576 }, { "epoch": 0.8853924674410419, "grad_norm": 1.7608866691589355, "learning_rate": 4.767728234931117e-07, "loss": 0.645, "step": 12577 }, { "epoch": 0.885462865188314, "grad_norm": 2.8511006832122803, "learning_rate": 4.7619401834047336e-07, "loss": 0.6368, "step": 12578 }, { "epoch": 0.8855332629355861, "grad_norm": 3.0386931896209717, "learning_rate": 4.7561555236641505e-07, "loss": 0.7486, "step": 12579 }, { "epoch": 0.8856036606828581, "grad_norm": 2.6089000701904297, "learning_rate": 4.7503742560101427e-07, "loss": 0.7601, "step": 12580 }, { "epoch": 0.8856740584301303, "grad_norm": 2.2246148586273193, "learning_rate": 4.744596380743259e-07, "loss": 0.6228, "step": 12581 }, { "epoch": 0.8857444561774023, "grad_norm": 2.0544445514678955, "learning_rate": 4.7388218981639175e-07, "loss": 0.635, "step": 12582 }, { "epoch": 0.8858148539246744, "grad_norm": 2.0381734371185303, "learning_rate": 4.7330508085723174e-07, "loss": 0.6092, "step": 12583 }, { "epoch": 0.8858852516719465, "grad_norm": 1.9095635414123535, "learning_rate": 4.727283112268535e-07, "loss": 0.6702, "step": 12584 }, { "epoch": 0.8859556494192186, "grad_norm": 2.1992170810699463, "learning_rate": 4.721518809552405e-07, "loss": 0.675, "step": 12585 }, { "epoch": 0.8860260471664907, "grad_norm": 1.7810245752334595, "learning_rate": 4.7157579007236374e-07, "loss": 0.6398, "step": 12586 }, { "epoch": 0.8860964449137627, "grad_norm": 2.144512891769409, "learning_rate": 4.710000386081724e-07, "loss": 0.6211, "step": 12587 }, { "epoch": 0.8861668426610348, "grad_norm": 1.890750527381897, "learning_rate": 4.704246265926042e-07, "loss": 0.532, "step": 12588 }, { "epoch": 0.886237240408307, "grad_norm": 1.8819948434829712, "learning_rate": 4.6984955405557183e-07, "loss": 0.7227, "step": 12589 }, { "epoch": 0.886307638155579, "grad_norm": 1.8352811336517334, "learning_rate": 4.6927482102697714e-07, "loss": 0.6645, "step": 12590 }, { "epoch": 0.8863780359028511, "grad_norm": 1.9750006198883057, "learning_rate": 4.6870042753669714e-07, "loss": 0.6551, "step": 12591 }, { "epoch": 0.8864484336501232, "grad_norm": 1.6565097570419312, "learning_rate": 4.6812637361459797e-07, "loss": 0.7189, "step": 12592 }, { "epoch": 0.8865188313973953, "grad_norm": 2.216400623321533, "learning_rate": 4.6755265929052313e-07, "loss": 0.673, "step": 12593 }, { "epoch": 0.8865892291446674, "grad_norm": 2.2100305557250977, "learning_rate": 4.6697928459430226e-07, "loss": 0.6386, "step": 12594 }, { "epoch": 0.8866596268919394, "grad_norm": 1.5414975881576538, "learning_rate": 4.6640624955574473e-07, "loss": 0.7041, "step": 12595 }, { "epoch": 0.8867300246392116, "grad_norm": 2.4466373920440674, "learning_rate": 4.6583355420464434e-07, "loss": 0.6624, "step": 12596 }, { "epoch": 0.8868004223864836, "grad_norm": 1.8951863050460815, "learning_rate": 4.652611985707747e-07, "loss": 0.6161, "step": 12597 }, { "epoch": 0.8868708201337557, "grad_norm": 2.0504043102264404, "learning_rate": 4.6468918268389325e-07, "loss": 0.7088, "step": 12598 }, { "epoch": 0.8869412178810278, "grad_norm": 2.017246961593628, "learning_rate": 4.6411750657374085e-07, "loss": 0.6243, "step": 12599 }, { "epoch": 0.8870116156282999, "grad_norm": 2.987362861633301, "learning_rate": 4.635461702700367e-07, "loss": 0.6016, "step": 12600 }, { "epoch": 0.887082013375572, "grad_norm": 2.317232608795166, "learning_rate": 4.6297517380248854e-07, "loss": 0.6449, "step": 12601 }, { "epoch": 0.887152411122844, "grad_norm": 2.4976181983947754, "learning_rate": 4.6240451720078126e-07, "loss": 0.7789, "step": 12602 }, { "epoch": 0.8872228088701162, "grad_norm": 1.8676873445510864, "learning_rate": 4.6183420049458433e-07, "loss": 0.5504, "step": 12603 }, { "epoch": 0.8872932066173882, "grad_norm": 2.159871816635132, "learning_rate": 4.6126422371354713e-07, "loss": 0.6574, "step": 12604 }, { "epoch": 0.8873636043646603, "grad_norm": 1.8507325649261475, "learning_rate": 4.606945868873057e-07, "loss": 0.6194, "step": 12605 }, { "epoch": 0.8874340021119325, "grad_norm": 1.921963095664978, "learning_rate": 4.601252900454743e-07, "loss": 0.6975, "step": 12606 }, { "epoch": 0.8875043998592045, "grad_norm": 1.8644044399261475, "learning_rate": 4.5955633321765333e-07, "loss": 0.6165, "step": 12607 }, { "epoch": 0.8875747976064766, "grad_norm": 2.009706974029541, "learning_rate": 4.589877164334213e-07, "loss": 0.8199, "step": 12608 }, { "epoch": 0.8876451953537486, "grad_norm": 2.3021645545959473, "learning_rate": 4.58419439722342e-07, "loss": 0.6711, "step": 12609 }, { "epoch": 0.8877155931010208, "grad_norm": 1.6189368963241577, "learning_rate": 4.5785150311395985e-07, "loss": 0.6955, "step": 12610 }, { "epoch": 0.8877859908482929, "grad_norm": 1.905119776725769, "learning_rate": 4.572839066378036e-07, "loss": 0.5634, "step": 12611 }, { "epoch": 0.8878563885955649, "grad_norm": 2.0477712154388428, "learning_rate": 4.5671665032338114e-07, "loss": 0.6037, "step": 12612 }, { "epoch": 0.8879267863428371, "grad_norm": 2.172132730484009, "learning_rate": 4.5614973420018714e-07, "loss": 0.7269, "step": 12613 }, { "epoch": 0.8879971840901091, "grad_norm": 2.0179283618927, "learning_rate": 4.555831582976951e-07, "loss": 0.625, "step": 12614 }, { "epoch": 0.8880675818373812, "grad_norm": 1.731439471244812, "learning_rate": 4.5501692264536177e-07, "loss": 0.6481, "step": 12615 }, { "epoch": 0.8881379795846532, "grad_norm": 2.0047290325164795, "learning_rate": 4.544510272726249e-07, "loss": 0.6004, "step": 12616 }, { "epoch": 0.8882083773319254, "grad_norm": 1.8155726194381714, "learning_rate": 4.538854722089084e-07, "loss": 0.6468, "step": 12617 }, { "epoch": 0.8882787750791975, "grad_norm": 1.731797456741333, "learning_rate": 4.533202574836137e-07, "loss": 0.5862, "step": 12618 }, { "epoch": 0.8883491728264695, "grad_norm": 1.7664546966552734, "learning_rate": 4.5275538312612815e-07, "loss": 0.6641, "step": 12619 }, { "epoch": 0.8884195705737417, "grad_norm": 2.243551015853882, "learning_rate": 4.521908491658205e-07, "loss": 0.6471, "step": 12620 }, { "epoch": 0.8884899683210137, "grad_norm": 2.0624167919158936, "learning_rate": 4.5162665563204007e-07, "loss": 0.6051, "step": 12621 }, { "epoch": 0.8885603660682858, "grad_norm": 2.009974241256714, "learning_rate": 4.5106280255411976e-07, "loss": 0.7002, "step": 12622 }, { "epoch": 0.888630763815558, "grad_norm": 1.9113819599151611, "learning_rate": 4.5049928996137634e-07, "loss": 0.6388, "step": 12623 }, { "epoch": 0.88870116156283, "grad_norm": 1.9039044380187988, "learning_rate": 4.499361178831054e-07, "loss": 0.6713, "step": 12624 }, { "epoch": 0.8887715593101021, "grad_norm": 1.7000787258148193, "learning_rate": 4.4937328634858864e-07, "loss": 0.6211, "step": 12625 }, { "epoch": 0.8888419570573741, "grad_norm": 1.9217792749404907, "learning_rate": 4.488107953870876e-07, "loss": 0.6413, "step": 12626 }, { "epoch": 0.8889123548046463, "grad_norm": 2.1340763568878174, "learning_rate": 4.4824864502784654e-07, "loss": 0.7392, "step": 12627 }, { "epoch": 0.8889827525519184, "grad_norm": 2.5532572269439697, "learning_rate": 4.4768683530009055e-07, "loss": 0.6537, "step": 12628 }, { "epoch": 0.8890531502991904, "grad_norm": 2.724128007888794, "learning_rate": 4.4712536623303134e-07, "loss": 0.61, "step": 12629 }, { "epoch": 0.8891235480464625, "grad_norm": 1.9622584581375122, "learning_rate": 4.465642378558581e-07, "loss": 0.6468, "step": 12630 }, { "epoch": 0.8891939457937346, "grad_norm": 1.8023685216903687, "learning_rate": 4.46003450197746e-07, "loss": 0.6097, "step": 12631 }, { "epoch": 0.8892643435410067, "grad_norm": 2.103194236755371, "learning_rate": 4.454430032878509e-07, "loss": 0.6559, "step": 12632 }, { "epoch": 0.8893347412882788, "grad_norm": 2.0208394527435303, "learning_rate": 4.4488289715530913e-07, "loss": 0.7369, "step": 12633 }, { "epoch": 0.8894051390355509, "grad_norm": 2.5296177864074707, "learning_rate": 4.443231318292423e-07, "loss": 0.774, "step": 12634 }, { "epoch": 0.889475536782823, "grad_norm": 2.1311676502227783, "learning_rate": 4.4376370733875336e-07, "loss": 0.6271, "step": 12635 }, { "epoch": 0.889545934530095, "grad_norm": 1.8202016353607178, "learning_rate": 4.432046237129258e-07, "loss": 0.6154, "step": 12636 }, { "epoch": 0.8896163322773671, "grad_norm": 2.395353078842163, "learning_rate": 4.426458809808291e-07, "loss": 0.6625, "step": 12637 }, { "epoch": 0.8896867300246392, "grad_norm": 2.452648878097534, "learning_rate": 4.420874791715119e-07, "loss": 0.7539, "step": 12638 }, { "epoch": 0.8897571277719113, "grad_norm": 2.4652719497680664, "learning_rate": 4.4152941831400635e-07, "loss": 0.6552, "step": 12639 }, { "epoch": 0.8898275255191834, "grad_norm": 1.8555617332458496, "learning_rate": 4.409716984373245e-07, "loss": 0.5417, "step": 12640 }, { "epoch": 0.8898979232664554, "grad_norm": 1.5887924432754517, "learning_rate": 4.4041431957046516e-07, "loss": 0.5334, "step": 12641 }, { "epoch": 0.8899683210137276, "grad_norm": 2.2379753589630127, "learning_rate": 4.398572817424053e-07, "loss": 0.5867, "step": 12642 }, { "epoch": 0.8900387187609996, "grad_norm": 1.77045476436615, "learning_rate": 4.393005849821073e-07, "loss": 0.5764, "step": 12643 }, { "epoch": 0.8901091165082717, "grad_norm": 2.3526084423065186, "learning_rate": 4.38744229318514e-07, "loss": 0.587, "step": 12644 }, { "epoch": 0.8901795142555439, "grad_norm": 2.0011634826660156, "learning_rate": 4.3818821478055025e-07, "loss": 0.7181, "step": 12645 }, { "epoch": 0.8902499120028159, "grad_norm": 1.930196762084961, "learning_rate": 4.376325413971225e-07, "loss": 0.5916, "step": 12646 }, { "epoch": 0.890320309750088, "grad_norm": 1.971988320350647, "learning_rate": 4.37077209197123e-07, "loss": 0.6627, "step": 12647 }, { "epoch": 0.89039070749736, "grad_norm": 2.086435556411743, "learning_rate": 4.365222182094239e-07, "loss": 0.6197, "step": 12648 }, { "epoch": 0.8904611052446322, "grad_norm": 1.9747546911239624, "learning_rate": 4.3596756846287875e-07, "loss": 0.5652, "step": 12649 }, { "epoch": 0.8905315029919043, "grad_norm": 1.927749752998352, "learning_rate": 4.3541325998632383e-07, "loss": 0.726, "step": 12650 }, { "epoch": 0.8906019007391763, "grad_norm": 1.9744817018508911, "learning_rate": 4.348592928085785e-07, "loss": 0.5774, "step": 12651 }, { "epoch": 0.8906722984864485, "grad_norm": 2.151243209838867, "learning_rate": 4.343056669584456e-07, "loss": 0.7422, "step": 12652 }, { "epoch": 0.8907426962337205, "grad_norm": 2.133857250213623, "learning_rate": 4.337523824647057e-07, "loss": 0.6856, "step": 12653 }, { "epoch": 0.8908130939809926, "grad_norm": 1.9856460094451904, "learning_rate": 4.3319943935612745e-07, "loss": 0.7201, "step": 12654 }, { "epoch": 0.8908834917282646, "grad_norm": 2.055107593536377, "learning_rate": 4.3264683766145715e-07, "loss": 0.6206, "step": 12655 }, { "epoch": 0.8909538894755368, "grad_norm": 1.8936703205108643, "learning_rate": 4.320945774094262e-07, "loss": 0.6901, "step": 12656 }, { "epoch": 0.8910242872228089, "grad_norm": 1.836005687713623, "learning_rate": 4.315426586287444e-07, "loss": 0.5887, "step": 12657 }, { "epoch": 0.8910946849700809, "grad_norm": 1.5579479932785034, "learning_rate": 4.3099108134811045e-07, "loss": 0.5247, "step": 12658 }, { "epoch": 0.8911650827173531, "grad_norm": 1.9940286874771118, "learning_rate": 4.304398455961984e-07, "loss": 0.7357, "step": 12659 }, { "epoch": 0.8912354804646251, "grad_norm": 2.0340497493743896, "learning_rate": 4.2988895140166895e-07, "loss": 0.661, "step": 12660 }, { "epoch": 0.8913058782118972, "grad_norm": 1.8769004344940186, "learning_rate": 4.2933839879316336e-07, "loss": 0.6091, "step": 12661 }, { "epoch": 0.8913762759591694, "grad_norm": 1.9572727680206299, "learning_rate": 4.2878818779930593e-07, "loss": 0.6865, "step": 12662 }, { "epoch": 0.8914466737064414, "grad_norm": 1.9584788084030151, "learning_rate": 4.2823831844869984e-07, "loss": 0.6134, "step": 12663 }, { "epoch": 0.8915170714537135, "grad_norm": 1.6269088983535767, "learning_rate": 4.276887907699367e-07, "loss": 0.5742, "step": 12664 }, { "epoch": 0.8915874692009855, "grad_norm": 2.0361266136169434, "learning_rate": 4.2713960479158473e-07, "loss": 0.7153, "step": 12665 }, { "epoch": 0.8916578669482577, "grad_norm": 2.0586066246032715, "learning_rate": 4.2659076054219824e-07, "loss": 0.7005, "step": 12666 }, { "epoch": 0.8917282646955298, "grad_norm": 1.815468668937683, "learning_rate": 4.260422580503113e-07, "loss": 0.6341, "step": 12667 }, { "epoch": 0.8917986624428018, "grad_norm": 1.9471534490585327, "learning_rate": 4.254940973444415e-07, "loss": 0.5626, "step": 12668 }, { "epoch": 0.891869060190074, "grad_norm": 2.039745569229126, "learning_rate": 4.2494627845308587e-07, "loss": 0.5514, "step": 12669 }, { "epoch": 0.891939457937346, "grad_norm": 2.5373291969299316, "learning_rate": 4.243988014047301e-07, "loss": 0.6677, "step": 12670 }, { "epoch": 0.8920098556846181, "grad_norm": 1.5186189413070679, "learning_rate": 4.2385166622783363e-07, "loss": 0.5757, "step": 12671 }, { "epoch": 0.8920802534318901, "grad_norm": 1.9326879978179932, "learning_rate": 4.233048729508467e-07, "loss": 0.692, "step": 12672 }, { "epoch": 0.8921506511791623, "grad_norm": 2.410602331161499, "learning_rate": 4.227584216021953e-07, "loss": 0.6127, "step": 12673 }, { "epoch": 0.8922210489264344, "grad_norm": 2.6422340869903564, "learning_rate": 4.2221231221029074e-07, "loss": 0.627, "step": 12674 }, { "epoch": 0.8922914466737064, "grad_norm": 1.9146846532821655, "learning_rate": 4.216665448035233e-07, "loss": 0.7242, "step": 12675 }, { "epoch": 0.8923618444209785, "grad_norm": 2.0007271766662598, "learning_rate": 4.211211194102715e-07, "loss": 0.7023, "step": 12676 }, { "epoch": 0.8924322421682506, "grad_norm": 1.8550442457199097, "learning_rate": 4.2057603605889006e-07, "loss": 0.6761, "step": 12677 }, { "epoch": 0.8925026399155227, "grad_norm": 1.820585012435913, "learning_rate": 4.2003129477771936e-07, "loss": 0.6479, "step": 12678 }, { "epoch": 0.8925730376627948, "grad_norm": 1.8227121829986572, "learning_rate": 4.1948689559508143e-07, "loss": 0.6955, "step": 12679 }, { "epoch": 0.8926434354100669, "grad_norm": 1.7441926002502441, "learning_rate": 4.189428385392795e-07, "loss": 0.6291, "step": 12680 }, { "epoch": 0.892713833157339, "grad_norm": 2.3456947803497314, "learning_rate": 4.183991236385989e-07, "loss": 0.7299, "step": 12681 }, { "epoch": 0.892784230904611, "grad_norm": 2.203671455383301, "learning_rate": 4.178557509213087e-07, "loss": 0.6026, "step": 12682 }, { "epoch": 0.8928546286518831, "grad_norm": 1.7664738893508911, "learning_rate": 4.173127204156585e-07, "loss": 0.7034, "step": 12683 }, { "epoch": 0.8929250263991553, "grad_norm": 1.8282629251480103, "learning_rate": 4.1677003214988317e-07, "loss": 0.6397, "step": 12684 }, { "epoch": 0.8929954241464273, "grad_norm": 1.897026538848877, "learning_rate": 4.162276861521951e-07, "loss": 0.5903, "step": 12685 }, { "epoch": 0.8930658218936994, "grad_norm": 2.0793826580047607, "learning_rate": 4.156856824507925e-07, "loss": 0.6716, "step": 12686 }, { "epoch": 0.8931362196409715, "grad_norm": 2.1143579483032227, "learning_rate": 4.151440210738536e-07, "loss": 0.7654, "step": 12687 }, { "epoch": 0.8932066173882436, "grad_norm": 1.7290246486663818, "learning_rate": 4.1460270204954177e-07, "loss": 0.5228, "step": 12688 }, { "epoch": 0.8932770151355157, "grad_norm": 1.8596315383911133, "learning_rate": 4.140617254059977e-07, "loss": 0.5703, "step": 12689 }, { "epoch": 0.8933474128827877, "grad_norm": 1.9747642278671265, "learning_rate": 4.135210911713507e-07, "loss": 0.6762, "step": 12690 }, { "epoch": 0.8934178106300599, "grad_norm": 1.8441801071166992, "learning_rate": 4.1298079937370656e-07, "loss": 0.6229, "step": 12691 }, { "epoch": 0.8934882083773319, "grad_norm": 1.6450830698013306, "learning_rate": 4.1244085004115793e-07, "loss": 0.7024, "step": 12692 }, { "epoch": 0.893558606124604, "grad_norm": 1.7553074359893799, "learning_rate": 4.1190124320177334e-07, "loss": 0.7143, "step": 12693 }, { "epoch": 0.893629003871876, "grad_norm": 2.377546548843384, "learning_rate": 4.1136197888361125e-07, "loss": 0.6723, "step": 12694 }, { "epoch": 0.8936994016191482, "grad_norm": 1.6331195831298828, "learning_rate": 4.1082305711470525e-07, "loss": 0.6593, "step": 12695 }, { "epoch": 0.8937697993664203, "grad_norm": 1.8971028327941895, "learning_rate": 4.102844779230772e-07, "loss": 0.5626, "step": 12696 }, { "epoch": 0.8938401971136923, "grad_norm": 1.750231146812439, "learning_rate": 4.0974624133672577e-07, "loss": 0.6543, "step": 12697 }, { "epoch": 0.8939105948609645, "grad_norm": 2.030688524246216, "learning_rate": 4.092083473836371e-07, "loss": 0.7174, "step": 12698 }, { "epoch": 0.8939809926082365, "grad_norm": 1.862993597984314, "learning_rate": 4.086707960917756e-07, "loss": 0.6059, "step": 12699 }, { "epoch": 0.8940513903555086, "grad_norm": 1.9729101657867432, "learning_rate": 4.0813358748908776e-07, "loss": 0.6657, "step": 12700 }, { "epoch": 0.8941217881027808, "grad_norm": 2.405872344970703, "learning_rate": 4.0759672160350546e-07, "loss": 0.6074, "step": 12701 }, { "epoch": 0.8941921858500528, "grad_norm": 2.0389952659606934, "learning_rate": 4.0706019846293937e-07, "loss": 0.5382, "step": 12702 }, { "epoch": 0.8942625835973249, "grad_norm": 2.1494410037994385, "learning_rate": 4.065240180952856e-07, "loss": 0.7051, "step": 12703 }, { "epoch": 0.8943329813445969, "grad_norm": 2.3281142711639404, "learning_rate": 4.0598818052841914e-07, "loss": 0.576, "step": 12704 }, { "epoch": 0.8944033790918691, "grad_norm": 1.9307186603546143, "learning_rate": 4.0545268579019944e-07, "loss": 0.7282, "step": 12705 }, { "epoch": 0.8944737768391412, "grad_norm": 1.8863215446472168, "learning_rate": 4.0491753390846586e-07, "loss": 0.559, "step": 12706 }, { "epoch": 0.8945441745864132, "grad_norm": 2.196160078048706, "learning_rate": 4.0438272491104286e-07, "loss": 0.5142, "step": 12707 }, { "epoch": 0.8946145723336854, "grad_norm": 1.7630287408828735, "learning_rate": 4.038482588257348e-07, "loss": 0.5842, "step": 12708 }, { "epoch": 0.8946849700809574, "grad_norm": 1.9779359102249146, "learning_rate": 4.0331413568033125e-07, "loss": 0.687, "step": 12709 }, { "epoch": 0.8947553678282295, "grad_norm": 1.8714802265167236, "learning_rate": 4.0278035550259915e-07, "loss": 0.552, "step": 12710 }, { "epoch": 0.8948257655755015, "grad_norm": 1.8859127759933472, "learning_rate": 4.0224691832029157e-07, "loss": 0.5592, "step": 12711 }, { "epoch": 0.8948961633227737, "grad_norm": 1.96669340133667, "learning_rate": 4.017138241611413e-07, "loss": 0.6411, "step": 12712 }, { "epoch": 0.8949665610700458, "grad_norm": 1.7382675409317017, "learning_rate": 4.0118107305286563e-07, "loss": 0.5957, "step": 12713 }, { "epoch": 0.8950369588173178, "grad_norm": 2.4964847564697266, "learning_rate": 4.0064866502316085e-07, "loss": 0.7347, "step": 12714 }, { "epoch": 0.89510735656459, "grad_norm": 1.8137723207473755, "learning_rate": 4.0011660009971003e-07, "loss": 0.6441, "step": 12715 }, { "epoch": 0.895177754311862, "grad_norm": 1.9350216388702393, "learning_rate": 3.995848783101753e-07, "loss": 0.7188, "step": 12716 }, { "epoch": 0.8952481520591341, "grad_norm": 2.6836845874786377, "learning_rate": 3.990534996821993e-07, "loss": 0.672, "step": 12717 }, { "epoch": 0.8953185498064062, "grad_norm": 1.407375454902649, "learning_rate": 3.985224642434091e-07, "loss": 0.7466, "step": 12718 }, { "epoch": 0.8953889475536783, "grad_norm": 2.1451542377471924, "learning_rate": 3.9799177202141633e-07, "loss": 0.6636, "step": 12719 }, { "epoch": 0.8954593453009504, "grad_norm": 1.5805013179779053, "learning_rate": 3.974614230438085e-07, "loss": 0.6829, "step": 12720 }, { "epoch": 0.8955297430482224, "grad_norm": 2.174046277999878, "learning_rate": 3.969314173381629e-07, "loss": 0.639, "step": 12721 }, { "epoch": 0.8956001407954945, "grad_norm": 2.1297147274017334, "learning_rate": 3.964017549320322e-07, "loss": 0.7597, "step": 12722 }, { "epoch": 0.8956705385427667, "grad_norm": 2.342426300048828, "learning_rate": 3.958724358529556e-07, "loss": 0.7285, "step": 12723 }, { "epoch": 0.8957409362900387, "grad_norm": 2.6220457553863525, "learning_rate": 3.953434601284507e-07, "loss": 0.7247, "step": 12724 }, { "epoch": 0.8958113340373108, "grad_norm": 2.3223671913146973, "learning_rate": 3.948148277860217e-07, "loss": 0.6116, "step": 12725 }, { "epoch": 0.8958817317845829, "grad_norm": 2.9614992141723633, "learning_rate": 3.9428653885315054e-07, "loss": 0.6774, "step": 12726 }, { "epoch": 0.895952129531855, "grad_norm": 2.4214603900909424, "learning_rate": 3.9375859335730653e-07, "loss": 0.7084, "step": 12727 }, { "epoch": 0.8960225272791271, "grad_norm": 2.5057008266448975, "learning_rate": 3.9323099132593577e-07, "loss": 0.625, "step": 12728 }, { "epoch": 0.8960929250263991, "grad_norm": 1.738076090812683, "learning_rate": 3.927037327864694e-07, "loss": 0.5931, "step": 12729 }, { "epoch": 0.8961633227736713, "grad_norm": 2.3016772270202637, "learning_rate": 3.921768177663186e-07, "loss": 0.6065, "step": 12730 }, { "epoch": 0.8962337205209433, "grad_norm": 1.7701576948165894, "learning_rate": 3.9165024629288047e-07, "loss": 0.5428, "step": 12731 }, { "epoch": 0.8963041182682154, "grad_norm": 1.8990041017532349, "learning_rate": 3.911240183935303e-07, "loss": 0.6474, "step": 12732 }, { "epoch": 0.8963745160154875, "grad_norm": 1.805142879486084, "learning_rate": 3.905981340956278e-07, "loss": 0.6213, "step": 12733 }, { "epoch": 0.8964449137627596, "grad_norm": 2.0903775691986084, "learning_rate": 3.90072593426515e-07, "loss": 0.6795, "step": 12734 }, { "epoch": 0.8965153115100317, "grad_norm": 2.172001838684082, "learning_rate": 3.895473964135143e-07, "loss": 0.6873, "step": 12735 }, { "epoch": 0.8965857092573037, "grad_norm": 2.584003210067749, "learning_rate": 3.8902254308393036e-07, "loss": 0.684, "step": 12736 }, { "epoch": 0.8966561070045759, "grad_norm": 1.8173872232437134, "learning_rate": 3.884980334650522e-07, "loss": 0.5839, "step": 12737 }, { "epoch": 0.8967265047518479, "grad_norm": 2.1484858989715576, "learning_rate": 3.8797386758414785e-07, "loss": 0.666, "step": 12738 }, { "epoch": 0.89679690249912, "grad_norm": 3.101059913635254, "learning_rate": 3.8745004546847227e-07, "loss": 0.7095, "step": 12739 }, { "epoch": 0.8968673002463922, "grad_norm": 1.8578574657440186, "learning_rate": 3.8692656714525696e-07, "loss": 0.6334, "step": 12740 }, { "epoch": 0.8969376979936642, "grad_norm": 2.14135479927063, "learning_rate": 3.8640343264171796e-07, "loss": 0.6967, "step": 12741 }, { "epoch": 0.8970080957409363, "grad_norm": 2.084247589111328, "learning_rate": 3.858806419850541e-07, "loss": 0.6307, "step": 12742 }, { "epoch": 0.8970784934882083, "grad_norm": 2.465562582015991, "learning_rate": 3.853581952024465e-07, "loss": 0.6801, "step": 12743 }, { "epoch": 0.8971488912354805, "grad_norm": 2.085970401763916, "learning_rate": 3.848360923210552e-07, "loss": 0.7281, "step": 12744 }, { "epoch": 0.8972192889827526, "grad_norm": 1.9784350395202637, "learning_rate": 3.8431433336802854e-07, "loss": 0.6766, "step": 12745 }, { "epoch": 0.8972896867300246, "grad_norm": 2.0120532512664795, "learning_rate": 3.837929183704909e-07, "loss": 0.7519, "step": 12746 }, { "epoch": 0.8973600844772968, "grad_norm": 2.0796923637390137, "learning_rate": 3.8327184735555017e-07, "loss": 0.653, "step": 12747 }, { "epoch": 0.8974304822245688, "grad_norm": 2.0237269401550293, "learning_rate": 3.827511203502997e-07, "loss": 0.7329, "step": 12748 }, { "epoch": 0.8975008799718409, "grad_norm": 2.126702070236206, "learning_rate": 3.8223073738181003e-07, "loss": 0.8473, "step": 12749 }, { "epoch": 0.8975712777191129, "grad_norm": 1.6316657066345215, "learning_rate": 3.8171069847713947e-07, "loss": 0.6875, "step": 12750 }, { "epoch": 0.8976416754663851, "grad_norm": 2.3432180881500244, "learning_rate": 3.811910036633229e-07, "loss": 0.6506, "step": 12751 }, { "epoch": 0.8977120732136572, "grad_norm": 2.431824207305908, "learning_rate": 3.806716529673806e-07, "loss": 0.8177, "step": 12752 }, { "epoch": 0.8977824709609292, "grad_norm": 1.7998286485671997, "learning_rate": 3.801526464163132e-07, "loss": 0.6041, "step": 12753 }, { "epoch": 0.8978528687082014, "grad_norm": 1.8347160816192627, "learning_rate": 3.796339840371059e-07, "loss": 0.703, "step": 12754 }, { "epoch": 0.8979232664554734, "grad_norm": 2.0737791061401367, "learning_rate": 3.791156658567222e-07, "loss": 0.6715, "step": 12755 }, { "epoch": 0.8979936642027455, "grad_norm": 1.8967783451080322, "learning_rate": 3.785976919021122e-07, "loss": 0.7001, "step": 12756 }, { "epoch": 0.8980640619500176, "grad_norm": 2.0396907329559326, "learning_rate": 3.780800622002053e-07, "loss": 0.6604, "step": 12757 }, { "epoch": 0.8981344596972897, "grad_norm": 2.4060018062591553, "learning_rate": 3.7756277677791274e-07, "loss": 0.5937, "step": 12758 }, { "epoch": 0.8982048574445618, "grad_norm": 1.9974687099456787, "learning_rate": 3.7704583566212876e-07, "loss": 0.7244, "step": 12759 }, { "epoch": 0.8982752551918338, "grad_norm": 2.4429128170013428, "learning_rate": 3.765292388797307e-07, "loss": 0.6131, "step": 12760 }, { "epoch": 0.898345652939106, "grad_norm": 1.806418538093567, "learning_rate": 3.760129864575745e-07, "loss": 0.7047, "step": 12761 }, { "epoch": 0.8984160506863781, "grad_norm": 1.8437045812606812, "learning_rate": 3.7549707842250335e-07, "loss": 0.6921, "step": 12762 }, { "epoch": 0.8984864484336501, "grad_norm": 1.7365573644638062, "learning_rate": 3.7498151480133835e-07, "loss": 0.6237, "step": 12763 }, { "epoch": 0.8985568461809222, "grad_norm": 1.713553786277771, "learning_rate": 3.7446629562088515e-07, "loss": 0.6486, "step": 12764 }, { "epoch": 0.8986272439281943, "grad_norm": 1.8129632472991943, "learning_rate": 3.7395142090792853e-07, "loss": 0.6672, "step": 12765 }, { "epoch": 0.8986976416754664, "grad_norm": 1.9399206638336182, "learning_rate": 3.734368906892391e-07, "loss": 0.7274, "step": 12766 }, { "epoch": 0.8987680394227384, "grad_norm": 2.4641377925872803, "learning_rate": 3.729227049915659e-07, "loss": 0.6882, "step": 12767 }, { "epoch": 0.8988384371700106, "grad_norm": 2.2354109287261963, "learning_rate": 3.724088638416445e-07, "loss": 0.6854, "step": 12768 }, { "epoch": 0.8989088349172827, "grad_norm": 2.4282901287078857, "learning_rate": 3.718953672661882e-07, "loss": 0.6613, "step": 12769 }, { "epoch": 0.8989792326645547, "grad_norm": 2.288916826248169, "learning_rate": 3.713822152918946e-07, "loss": 0.6422, "step": 12770 }, { "epoch": 0.8990496304118268, "grad_norm": 2.0670173168182373, "learning_rate": 3.70869407945442e-07, "loss": 0.6026, "step": 12771 }, { "epoch": 0.8991200281590989, "grad_norm": 1.9988847970962524, "learning_rate": 3.703569452534937e-07, "loss": 0.6016, "step": 12772 }, { "epoch": 0.899190425906371, "grad_norm": 1.838800311088562, "learning_rate": 3.6984482724269074e-07, "loss": 0.6439, "step": 12773 }, { "epoch": 0.8992608236536431, "grad_norm": 2.296947717666626, "learning_rate": 3.6933305393966076e-07, "loss": 0.6726, "step": 12774 }, { "epoch": 0.8993312214009151, "grad_norm": 2.171412467956543, "learning_rate": 3.6882162537101124e-07, "loss": 0.669, "step": 12775 }, { "epoch": 0.8994016191481873, "grad_norm": 1.9719966650009155, "learning_rate": 3.683105415633303e-07, "loss": 0.6021, "step": 12776 }, { "epoch": 0.8994720168954593, "grad_norm": 2.230123519897461, "learning_rate": 3.677998025431889e-07, "loss": 0.7535, "step": 12777 }, { "epoch": 0.8995424146427314, "grad_norm": 2.4460668563842773, "learning_rate": 3.6728940833714397e-07, "loss": 0.7305, "step": 12778 }, { "epoch": 0.8996128123900036, "grad_norm": 2.2440969944000244, "learning_rate": 3.6677935897172843e-07, "loss": 0.5962, "step": 12779 }, { "epoch": 0.8996832101372756, "grad_norm": 2.0131995677948, "learning_rate": 3.6626965447346196e-07, "loss": 0.6669, "step": 12780 }, { "epoch": 0.8997536078845477, "grad_norm": 2.241041660308838, "learning_rate": 3.6576029486884476e-07, "loss": 0.617, "step": 12781 }, { "epoch": 0.8998240056318197, "grad_norm": 2.0290729999542236, "learning_rate": 3.652512801843577e-07, "loss": 0.5318, "step": 12782 }, { "epoch": 0.8998944033790919, "grad_norm": 2.052516222000122, "learning_rate": 3.647426104464645e-07, "loss": 0.7506, "step": 12783 }, { "epoch": 0.899964801126364, "grad_norm": 2.6618399620056152, "learning_rate": 3.6423428568161255e-07, "loss": 0.7495, "step": 12784 }, { "epoch": 0.900035198873636, "grad_norm": 2.049405336380005, "learning_rate": 3.637263059162298e-07, "loss": 0.7616, "step": 12785 }, { "epoch": 0.9001055966209082, "grad_norm": 1.9660004377365112, "learning_rate": 3.632186711767271e-07, "loss": 0.5933, "step": 12786 }, { "epoch": 0.9001759943681802, "grad_norm": 2.071615695953369, "learning_rate": 3.6271138148949677e-07, "loss": 0.5644, "step": 12787 }, { "epoch": 0.9002463921154523, "grad_norm": 2.2999231815338135, "learning_rate": 3.622044368809123e-07, "loss": 0.6951, "step": 12788 }, { "epoch": 0.9003167898627243, "grad_norm": 2.2266645431518555, "learning_rate": 3.616978373773294e-07, "loss": 0.5845, "step": 12789 }, { "epoch": 0.9003871876099965, "grad_norm": 2.211724042892456, "learning_rate": 3.611915830050898e-07, "loss": 0.6247, "step": 12790 }, { "epoch": 0.9004575853572686, "grad_norm": 2.1906816959381104, "learning_rate": 3.606856737905103e-07, "loss": 0.5441, "step": 12791 }, { "epoch": 0.9005279831045406, "grad_norm": 2.0404815673828125, "learning_rate": 3.601801097598969e-07, "loss": 0.6288, "step": 12792 }, { "epoch": 0.9005983808518128, "grad_norm": 2.127931833267212, "learning_rate": 3.5967489093953376e-07, "loss": 0.6777, "step": 12793 }, { "epoch": 0.9006687785990848, "grad_norm": 1.8189016580581665, "learning_rate": 3.5917001735568567e-07, "loss": 0.6455, "step": 12794 }, { "epoch": 0.9007391763463569, "grad_norm": 2.260509729385376, "learning_rate": 3.586654890346026e-07, "loss": 0.6994, "step": 12795 }, { "epoch": 0.900809574093629, "grad_norm": 1.9314017295837402, "learning_rate": 3.5816130600251595e-07, "loss": 0.7092, "step": 12796 }, { "epoch": 0.9008799718409011, "grad_norm": 2.261931896209717, "learning_rate": 3.576574682856376e-07, "loss": 0.5837, "step": 12797 }, { "epoch": 0.9009503695881732, "grad_norm": 2.093508005142212, "learning_rate": 3.5715397591016317e-07, "loss": 0.5798, "step": 12798 }, { "epoch": 0.9010207673354452, "grad_norm": 2.929140329360962, "learning_rate": 3.566508289022704e-07, "loss": 0.5784, "step": 12799 }, { "epoch": 0.9010911650827174, "grad_norm": 1.8963792324066162, "learning_rate": 3.561480272881176e-07, "loss": 0.5878, "step": 12800 }, { "epoch": 0.9011615628299895, "grad_norm": 1.8582444190979004, "learning_rate": 3.5564557109384586e-07, "loss": 0.6156, "step": 12801 }, { "epoch": 0.9012319605772615, "grad_norm": 2.0806281566619873, "learning_rate": 3.551434603455771e-07, "loss": 0.733, "step": 12802 }, { "epoch": 0.9013023583245336, "grad_norm": 1.9748127460479736, "learning_rate": 3.54641695069419e-07, "loss": 0.6122, "step": 12803 }, { "epoch": 0.9013727560718057, "grad_norm": 2.2677619457244873, "learning_rate": 3.5414027529145685e-07, "loss": 0.6485, "step": 12804 }, { "epoch": 0.9014431538190778, "grad_norm": 2.0273773670196533, "learning_rate": 3.5363920103776115e-07, "loss": 0.5757, "step": 12805 }, { "epoch": 0.9015135515663498, "grad_norm": 2.1722378730773926, "learning_rate": 3.531384723343829e-07, "loss": 0.5686, "step": 12806 }, { "epoch": 0.901583949313622, "grad_norm": 1.9806171655654907, "learning_rate": 3.5263808920735527e-07, "loss": 0.6125, "step": 12807 }, { "epoch": 0.9016543470608941, "grad_norm": 2.1067287921905518, "learning_rate": 3.5213805168269283e-07, "loss": 0.8511, "step": 12808 }, { "epoch": 0.9017247448081661, "grad_norm": 2.519634962081909, "learning_rate": 3.5163835978639457e-07, "loss": 0.6586, "step": 12809 }, { "epoch": 0.9017951425554382, "grad_norm": 1.8084743022918701, "learning_rate": 3.5113901354443765e-07, "loss": 0.6227, "step": 12810 }, { "epoch": 0.9018655403027103, "grad_norm": 1.6561353206634521, "learning_rate": 3.506400129827869e-07, "loss": 0.6574, "step": 12811 }, { "epoch": 0.9019359380499824, "grad_norm": 1.8989489078521729, "learning_rate": 3.50141358127383e-07, "loss": 0.6935, "step": 12812 }, { "epoch": 0.9020063357972545, "grad_norm": 2.3569931983947754, "learning_rate": 3.496430490041527e-07, "loss": 0.6914, "step": 12813 }, { "epoch": 0.9020767335445266, "grad_norm": 1.9794529676437378, "learning_rate": 3.491450856390024e-07, "loss": 0.578, "step": 12814 }, { "epoch": 0.9021471312917987, "grad_norm": 1.936430811882019, "learning_rate": 3.486474680578231e-07, "loss": 0.6688, "step": 12815 }, { "epoch": 0.9022175290390707, "grad_norm": 1.8899744749069214, "learning_rate": 3.481501962864848e-07, "loss": 0.6176, "step": 12816 }, { "epoch": 0.9022879267863428, "grad_norm": 1.9729769229888916, "learning_rate": 3.4765327035084345e-07, "loss": 0.6852, "step": 12817 }, { "epoch": 0.902358324533615, "grad_norm": 1.7387604713439941, "learning_rate": 3.4715669027673334e-07, "loss": 0.7151, "step": 12818 }, { "epoch": 0.902428722280887, "grad_norm": 2.793734550476074, "learning_rate": 3.4666045608997154e-07, "loss": 0.6514, "step": 12819 }, { "epoch": 0.9024991200281591, "grad_norm": 2.391604423522949, "learning_rate": 3.461645678163573e-07, "loss": 0.7935, "step": 12820 }, { "epoch": 0.9025695177754312, "grad_norm": 2.06465744972229, "learning_rate": 3.4566902548167435e-07, "loss": 0.7444, "step": 12821 }, { "epoch": 0.9026399155227033, "grad_norm": 2.3096909523010254, "learning_rate": 3.451738291116846e-07, "loss": 0.6121, "step": 12822 }, { "epoch": 0.9027103132699753, "grad_norm": 1.8359850645065308, "learning_rate": 3.4467897873213524e-07, "loss": 0.536, "step": 12823 }, { "epoch": 0.9027807110172474, "grad_norm": 2.5471737384796143, "learning_rate": 3.4418447436875334e-07, "loss": 0.6599, "step": 12824 }, { "epoch": 0.9028511087645196, "grad_norm": 1.7400717735290527, "learning_rate": 3.436903160472479e-07, "loss": 0.5602, "step": 12825 }, { "epoch": 0.9029215065117916, "grad_norm": 1.8394352197647095, "learning_rate": 3.431965037933109e-07, "loss": 0.6697, "step": 12826 }, { "epoch": 0.9029919042590637, "grad_norm": 2.6791720390319824, "learning_rate": 3.4270303763261733e-07, "loss": 0.7471, "step": 12827 }, { "epoch": 0.9030623020063357, "grad_norm": 1.7802547216415405, "learning_rate": 3.422099175908203e-07, "loss": 0.5254, "step": 12828 }, { "epoch": 0.9031326997536079, "grad_norm": 2.0168209075927734, "learning_rate": 3.417171436935613e-07, "loss": 0.6435, "step": 12829 }, { "epoch": 0.90320309750088, "grad_norm": 2.1399827003479004, "learning_rate": 3.4122471596645775e-07, "loss": 0.7143, "step": 12830 }, { "epoch": 0.903273495248152, "grad_norm": 2.0741162300109863, "learning_rate": 3.407326344351115e-07, "loss": 0.6574, "step": 12831 }, { "epoch": 0.9033438929954242, "grad_norm": 1.9227501153945923, "learning_rate": 3.4024089912510567e-07, "loss": 0.5754, "step": 12832 }, { "epoch": 0.9034142907426962, "grad_norm": 3.1551308631896973, "learning_rate": 3.3974951006200803e-07, "loss": 0.6727, "step": 12833 }, { "epoch": 0.9034846884899683, "grad_norm": 2.6464955806732178, "learning_rate": 3.3925846727136445e-07, "loss": 0.657, "step": 12834 }, { "epoch": 0.9035550862372405, "grad_norm": 1.7021260261535645, "learning_rate": 3.38767770778706e-07, "loss": 0.5236, "step": 12835 }, { "epoch": 0.9036254839845125, "grad_norm": 2.599956750869751, "learning_rate": 3.3827742060954444e-07, "loss": 0.7633, "step": 12836 }, { "epoch": 0.9036958817317846, "grad_norm": 1.7120383977890015, "learning_rate": 3.3778741678937283e-07, "loss": 0.5652, "step": 12837 }, { "epoch": 0.9037662794790566, "grad_norm": 2.0349061489105225, "learning_rate": 3.3729775934366557e-07, "loss": 0.6224, "step": 12838 }, { "epoch": 0.9038366772263288, "grad_norm": 2.4620511531829834, "learning_rate": 3.368084482978838e-07, "loss": 0.6314, "step": 12839 }, { "epoch": 0.9039070749736009, "grad_norm": 2.026829719543457, "learning_rate": 3.363194836774639e-07, "loss": 0.6376, "step": 12840 }, { "epoch": 0.9039774727208729, "grad_norm": 1.983805775642395, "learning_rate": 3.358308655078297e-07, "loss": 0.5969, "step": 12841 }, { "epoch": 0.904047870468145, "grad_norm": 2.1517233848571777, "learning_rate": 3.3534259381438413e-07, "loss": 0.665, "step": 12842 }, { "epoch": 0.9041182682154171, "grad_norm": 1.6961874961853027, "learning_rate": 3.348546686225138e-07, "loss": 0.7299, "step": 12843 }, { "epoch": 0.9041886659626892, "grad_norm": 1.9158265590667725, "learning_rate": 3.343670899575842e-07, "loss": 0.6425, "step": 12844 }, { "epoch": 0.9042590637099612, "grad_norm": 1.935977816581726, "learning_rate": 3.338798578449471e-07, "loss": 0.5472, "step": 12845 }, { "epoch": 0.9043294614572334, "grad_norm": 1.5335156917572021, "learning_rate": 3.3339297230993224e-07, "loss": 0.5999, "step": 12846 }, { "epoch": 0.9043998592045055, "grad_norm": 1.9072974920272827, "learning_rate": 3.3290643337785553e-07, "loss": 0.6188, "step": 12847 }, { "epoch": 0.9044702569517775, "grad_norm": 2.341827392578125, "learning_rate": 3.324202410740118e-07, "loss": 0.7135, "step": 12848 }, { "epoch": 0.9045406546990497, "grad_norm": 2.4039247035980225, "learning_rate": 3.319343954236773e-07, "loss": 0.5868, "step": 12849 }, { "epoch": 0.9046110524463217, "grad_norm": 2.480161666870117, "learning_rate": 3.314488964521135e-07, "loss": 0.7152, "step": 12850 }, { "epoch": 0.9046814501935938, "grad_norm": 2.279153823852539, "learning_rate": 3.309637441845593e-07, "loss": 0.6372, "step": 12851 }, { "epoch": 0.9047518479408659, "grad_norm": 2.2725799083709717, "learning_rate": 3.3047893864624124e-07, "loss": 0.5783, "step": 12852 }, { "epoch": 0.904822245688138, "grad_norm": 1.8363969326019287, "learning_rate": 3.29994479862364e-07, "loss": 0.6459, "step": 12853 }, { "epoch": 0.9048926434354101, "grad_norm": 1.9575806856155396, "learning_rate": 3.295103678581136e-07, "loss": 0.6621, "step": 12854 }, { "epoch": 0.9049630411826821, "grad_norm": 2.1380631923675537, "learning_rate": 3.290266026586599e-07, "loss": 0.6271, "step": 12855 }, { "epoch": 0.9050334389299542, "grad_norm": 2.2080421447753906, "learning_rate": 3.285431842891555e-07, "loss": 0.6841, "step": 12856 }, { "epoch": 0.9051038366772264, "grad_norm": 2.043266534805298, "learning_rate": 3.2806011277473214e-07, "loss": 0.603, "step": 12857 }, { "epoch": 0.9051742344244984, "grad_norm": 2.1221463680267334, "learning_rate": 3.2757738814050665e-07, "loss": 0.6724, "step": 12858 }, { "epoch": 0.9052446321717705, "grad_norm": 2.029904365539551, "learning_rate": 3.270950104115757e-07, "loss": 0.712, "step": 12859 }, { "epoch": 0.9053150299190426, "grad_norm": 1.723382830619812, "learning_rate": 3.2661297961301907e-07, "loss": 0.5654, "step": 12860 }, { "epoch": 0.9053854276663147, "grad_norm": 2.1475448608398438, "learning_rate": 3.2613129576989594e-07, "loss": 0.7168, "step": 12861 }, { "epoch": 0.9054558254135867, "grad_norm": 1.9974924325942993, "learning_rate": 3.256499589072519e-07, "loss": 0.5738, "step": 12862 }, { "epoch": 0.9055262231608588, "grad_norm": 2.3779542446136475, "learning_rate": 3.251689690501105e-07, "loss": 0.6544, "step": 12863 }, { "epoch": 0.905596620908131, "grad_norm": 1.848689317703247, "learning_rate": 3.2468832622347984e-07, "loss": 0.6981, "step": 12864 }, { "epoch": 0.905667018655403, "grad_norm": 2.2935121059417725, "learning_rate": 3.242080304523487e-07, "loss": 0.6354, "step": 12865 }, { "epoch": 0.9057374164026751, "grad_norm": 1.865422010421753, "learning_rate": 3.2372808176168853e-07, "loss": 0.6466, "step": 12866 }, { "epoch": 0.9058078141499472, "grad_norm": 2.3552417755126953, "learning_rate": 3.232484801764508e-07, "loss": 0.7121, "step": 12867 }, { "epoch": 0.9058782118972193, "grad_norm": 1.6709052324295044, "learning_rate": 3.227692257215721e-07, "loss": 0.5838, "step": 12868 }, { "epoch": 0.9059486096444914, "grad_norm": 1.9453078508377075, "learning_rate": 3.22290318421968e-07, "loss": 0.6875, "step": 12869 }, { "epoch": 0.9060190073917634, "grad_norm": 1.6113935708999634, "learning_rate": 3.218117583025386e-07, "loss": 0.6246, "step": 12870 }, { "epoch": 0.9060894051390356, "grad_norm": 1.9072312116622925, "learning_rate": 3.2133354538816383e-07, "loss": 0.6494, "step": 12871 }, { "epoch": 0.9061598028863076, "grad_norm": 1.6125556230545044, "learning_rate": 3.208556797037064e-07, "loss": 0.5431, "step": 12872 }, { "epoch": 0.9062302006335797, "grad_norm": 2.9508135318756104, "learning_rate": 3.203781612740105e-07, "loss": 0.8712, "step": 12873 }, { "epoch": 0.9063005983808519, "grad_norm": 2.1149542331695557, "learning_rate": 3.199009901239047e-07, "loss": 0.7053, "step": 12874 }, { "epoch": 0.9063709961281239, "grad_norm": 2.007214307785034, "learning_rate": 3.19424166278195e-07, "loss": 0.7127, "step": 12875 }, { "epoch": 0.906441393875396, "grad_norm": 1.7325917482376099, "learning_rate": 3.1894768976167433e-07, "loss": 0.6148, "step": 12876 }, { "epoch": 0.906511791622668, "grad_norm": 2.2876899242401123, "learning_rate": 3.184715605991144e-07, "loss": 0.6835, "step": 12877 }, { "epoch": 0.9065821893699402, "grad_norm": 2.0610899925231934, "learning_rate": 3.179957788152685e-07, "loss": 0.6274, "step": 12878 }, { "epoch": 0.9066525871172122, "grad_norm": 2.0056865215301514, "learning_rate": 3.1752034443487273e-07, "loss": 0.6549, "step": 12879 }, { "epoch": 0.9067229848644843, "grad_norm": 2.2209813594818115, "learning_rate": 3.170452574826477e-07, "loss": 0.7177, "step": 12880 }, { "epoch": 0.9067933826117565, "grad_norm": 1.6608116626739502, "learning_rate": 3.165705179832905e-07, "loss": 0.5956, "step": 12881 }, { "epoch": 0.9068637803590285, "grad_norm": 1.812481164932251, "learning_rate": 3.1609612596148605e-07, "loss": 0.592, "step": 12882 }, { "epoch": 0.9069341781063006, "grad_norm": 1.9805703163146973, "learning_rate": 3.156220814418974e-07, "loss": 0.6844, "step": 12883 }, { "epoch": 0.9070045758535726, "grad_norm": 2.609215259552002, "learning_rate": 3.151483844491713e-07, "loss": 0.6554, "step": 12884 }, { "epoch": 0.9070749736008448, "grad_norm": 1.7005317211151123, "learning_rate": 3.1467503500793257e-07, "loss": 0.7062, "step": 12885 }, { "epoch": 0.9071453713481169, "grad_norm": 2.3874008655548096, "learning_rate": 3.1420203314279547e-07, "loss": 0.6532, "step": 12886 }, { "epoch": 0.9072157690953889, "grad_norm": 2.201958417892456, "learning_rate": 3.137293788783476e-07, "loss": 0.5865, "step": 12887 }, { "epoch": 0.9072861668426611, "grad_norm": 2.382857322692871, "learning_rate": 3.1325707223916654e-07, "loss": 0.5585, "step": 12888 }, { "epoch": 0.9073565645899331, "grad_norm": 1.9099528789520264, "learning_rate": 3.1278511324980573e-07, "loss": 0.6919, "step": 12889 }, { "epoch": 0.9074269623372052, "grad_norm": 1.8682796955108643, "learning_rate": 3.123135019348039e-07, "loss": 0.6485, "step": 12890 }, { "epoch": 0.9074973600844773, "grad_norm": 1.8306677341461182, "learning_rate": 3.1184223831867797e-07, "loss": 0.5813, "step": 12891 }, { "epoch": 0.9075677578317494, "grad_norm": 1.8511178493499756, "learning_rate": 3.1137132242593247e-07, "loss": 0.6978, "step": 12892 }, { "epoch": 0.9076381555790215, "grad_norm": 2.71256947517395, "learning_rate": 3.109007542810486e-07, "loss": 0.5565, "step": 12893 }, { "epoch": 0.9077085533262935, "grad_norm": 2.013268232345581, "learning_rate": 3.104305339084935e-07, "loss": 0.6849, "step": 12894 }, { "epoch": 0.9077789510735657, "grad_norm": 1.8601024150848389, "learning_rate": 3.099606613327135e-07, "loss": 0.6463, "step": 12895 }, { "epoch": 0.9078493488208378, "grad_norm": 1.8847731351852417, "learning_rate": 3.094911365781376e-07, "loss": 0.6595, "step": 12896 }, { "epoch": 0.9079197465681098, "grad_norm": 2.0277316570281982, "learning_rate": 3.090219596691757e-07, "loss": 0.5459, "step": 12897 }, { "epoch": 0.9079901443153819, "grad_norm": 2.359217643737793, "learning_rate": 3.085531306302217e-07, "loss": 0.5499, "step": 12898 }, { "epoch": 0.908060542062654, "grad_norm": 2.085557222366333, "learning_rate": 3.080846494856521e-07, "loss": 0.5772, "step": 12899 }, { "epoch": 0.9081309398099261, "grad_norm": 1.8098909854888916, "learning_rate": 3.0761651625982043e-07, "loss": 0.5923, "step": 12900 }, { "epoch": 0.9082013375571981, "grad_norm": 3.088669538497925, "learning_rate": 3.0714873097706896e-07, "loss": 0.5931, "step": 12901 }, { "epoch": 0.9082717353044703, "grad_norm": 2.3344790935516357, "learning_rate": 3.066812936617148e-07, "loss": 0.6671, "step": 12902 }, { "epoch": 0.9083421330517424, "grad_norm": 2.184030055999756, "learning_rate": 3.0621420433806355e-07, "loss": 0.6498, "step": 12903 }, { "epoch": 0.9084125307990144, "grad_norm": 1.6947458982467651, "learning_rate": 3.057474630303959e-07, "loss": 0.6077, "step": 12904 }, { "epoch": 0.9084829285462865, "grad_norm": 2.3132050037384033, "learning_rate": 3.052810697629824e-07, "loss": 0.7431, "step": 12905 }, { "epoch": 0.9085533262935586, "grad_norm": 1.8850780725479126, "learning_rate": 3.0481502456006724e-07, "loss": 0.686, "step": 12906 }, { "epoch": 0.9086237240408307, "grad_norm": 1.936397671699524, "learning_rate": 3.043493274458837e-07, "loss": 0.6904, "step": 12907 }, { "epoch": 0.9086941217881028, "grad_norm": 2.5216119289398193, "learning_rate": 3.038839784446433e-07, "loss": 0.5572, "step": 12908 }, { "epoch": 0.9087645195353748, "grad_norm": 2.46268630027771, "learning_rate": 3.0341897758053817e-07, "loss": 0.5814, "step": 12909 }, { "epoch": 0.908834917282647, "grad_norm": 2.907092332839966, "learning_rate": 3.0295432487774473e-07, "loss": 0.5944, "step": 12910 }, { "epoch": 0.908905315029919, "grad_norm": 1.8788238763809204, "learning_rate": 3.024900203604226e-07, "loss": 0.6825, "step": 12911 }, { "epoch": 0.9089757127771911, "grad_norm": 1.6942845582962036, "learning_rate": 3.020260640527086e-07, "loss": 0.751, "step": 12912 }, { "epoch": 0.9090461105244633, "grad_norm": 2.387601375579834, "learning_rate": 3.015624559787257e-07, "loss": 0.6126, "step": 12913 }, { "epoch": 0.9091165082717353, "grad_norm": 1.8445591926574707, "learning_rate": 3.010991961625781e-07, "loss": 0.6721, "step": 12914 }, { "epoch": 0.9091869060190074, "grad_norm": 1.9884434938430786, "learning_rate": 3.006362846283499e-07, "loss": 0.5458, "step": 12915 }, { "epoch": 0.9092573037662794, "grad_norm": 1.6827765703201294, "learning_rate": 3.001737214001081e-07, "loss": 0.5919, "step": 12916 }, { "epoch": 0.9093277015135516, "grad_norm": 2.0639185905456543, "learning_rate": 2.997115065019026e-07, "loss": 0.6093, "step": 12917 }, { "epoch": 0.9093980992608236, "grad_norm": 2.175994873046875, "learning_rate": 2.9924963995776297e-07, "loss": 0.7045, "step": 12918 }, { "epoch": 0.9094684970080957, "grad_norm": 2.692185401916504, "learning_rate": 2.9878812179170423e-07, "loss": 0.6566, "step": 12919 }, { "epoch": 0.9095388947553679, "grad_norm": 1.9929656982421875, "learning_rate": 2.9832695202772017e-07, "loss": 0.5973, "step": 12920 }, { "epoch": 0.9096092925026399, "grad_norm": 1.713752269744873, "learning_rate": 2.9786613068978694e-07, "loss": 0.6728, "step": 12921 }, { "epoch": 0.909679690249912, "grad_norm": 2.154620409011841, "learning_rate": 2.974056578018627e-07, "loss": 0.6812, "step": 12922 }, { "epoch": 0.909750087997184, "grad_norm": 1.8132238388061523, "learning_rate": 2.9694553338788997e-07, "loss": 0.6376, "step": 12923 }, { "epoch": 0.9098204857444562, "grad_norm": 2.3785531520843506, "learning_rate": 2.964857574717882e-07, "loss": 0.7322, "step": 12924 }, { "epoch": 0.9098908834917283, "grad_norm": 1.9371064901351929, "learning_rate": 2.9602633007746335e-07, "loss": 0.6114, "step": 12925 }, { "epoch": 0.9099612812390003, "grad_norm": 2.2457175254821777, "learning_rate": 2.9556725122880147e-07, "loss": 0.7938, "step": 12926 }, { "epoch": 0.9100316789862725, "grad_norm": 1.915107011795044, "learning_rate": 2.9510852094967045e-07, "loss": 0.65, "step": 12927 }, { "epoch": 0.9101020767335445, "grad_norm": 1.6298257112503052, "learning_rate": 2.9465013926391893e-07, "loss": 0.6213, "step": 12928 }, { "epoch": 0.9101724744808166, "grad_norm": 2.3069403171539307, "learning_rate": 2.9419210619537993e-07, "loss": 0.7225, "step": 12929 }, { "epoch": 0.9102428722280888, "grad_norm": 3.0743777751922607, "learning_rate": 2.9373442176786556e-07, "loss": 0.6973, "step": 12930 }, { "epoch": 0.9103132699753608, "grad_norm": 2.3194031715393066, "learning_rate": 2.932770860051731e-07, "loss": 0.6929, "step": 12931 }, { "epoch": 0.9103836677226329, "grad_norm": 1.6122033596038818, "learning_rate": 2.928200989310789e-07, "loss": 0.6249, "step": 12932 }, { "epoch": 0.9104540654699049, "grad_norm": 2.3794121742248535, "learning_rate": 2.9236346056934205e-07, "loss": 0.703, "step": 12933 }, { "epoch": 0.9105244632171771, "grad_norm": 1.8181971311569214, "learning_rate": 2.9190717094370246e-07, "loss": 0.6488, "step": 12934 }, { "epoch": 0.9105948609644492, "grad_norm": 1.5821506977081299, "learning_rate": 2.914512300778859e-07, "loss": 0.6288, "step": 12935 }, { "epoch": 0.9106652587117212, "grad_norm": 2.1430742740631104, "learning_rate": 2.90995637995594e-07, "loss": 0.6411, "step": 12936 }, { "epoch": 0.9107356564589933, "grad_norm": 1.9010369777679443, "learning_rate": 2.905403947205162e-07, "loss": 0.6514, "step": 12937 }, { "epoch": 0.9108060542062654, "grad_norm": 2.226746082305908, "learning_rate": 2.9008550027631984e-07, "loss": 0.587, "step": 12938 }, { "epoch": 0.9108764519535375, "grad_norm": 2.738449811935425, "learning_rate": 2.8963095468665467e-07, "loss": 0.5993, "step": 12939 }, { "epoch": 0.9109468497008095, "grad_norm": 2.043180465698242, "learning_rate": 2.891767579751532e-07, "loss": 0.5962, "step": 12940 }, { "epoch": 0.9110172474480817, "grad_norm": 1.6848148107528687, "learning_rate": 2.8872291016543006e-07, "loss": 0.7046, "step": 12941 }, { "epoch": 0.9110876451953538, "grad_norm": 2.207163095474243, "learning_rate": 2.8826941128107974e-07, "loss": 0.6559, "step": 12942 }, { "epoch": 0.9111580429426258, "grad_norm": 2.2823832035064697, "learning_rate": 2.878162613456827e-07, "loss": 0.6383, "step": 12943 }, { "epoch": 0.911228440689898, "grad_norm": 1.8808118104934692, "learning_rate": 2.873634603827969e-07, "loss": 0.658, "step": 12944 }, { "epoch": 0.91129883843717, "grad_norm": 2.5022969245910645, "learning_rate": 2.869110084159647e-07, "loss": 0.5719, "step": 12945 }, { "epoch": 0.9113692361844421, "grad_norm": 2.1335158348083496, "learning_rate": 2.8645890546870757e-07, "loss": 0.664, "step": 12946 }, { "epoch": 0.9114396339317142, "grad_norm": 1.650978446006775, "learning_rate": 2.8600715156453284e-07, "loss": 0.5679, "step": 12947 }, { "epoch": 0.9115100316789863, "grad_norm": 2.3181514739990234, "learning_rate": 2.8555574672692616e-07, "loss": 0.6988, "step": 12948 }, { "epoch": 0.9115804294262584, "grad_norm": 2.750030279159546, "learning_rate": 2.8510469097935776e-07, "loss": 0.5633, "step": 12949 }, { "epoch": 0.9116508271735304, "grad_norm": 2.1886115074157715, "learning_rate": 2.846539843452782e-07, "loss": 0.711, "step": 12950 }, { "epoch": 0.9117212249208025, "grad_norm": 2.0369720458984375, "learning_rate": 2.8420362684811883e-07, "loss": 0.7429, "step": 12951 }, { "epoch": 0.9117916226680747, "grad_norm": 1.700015664100647, "learning_rate": 2.837536185112961e-07, "loss": 0.6094, "step": 12952 }, { "epoch": 0.9118620204153467, "grad_norm": 1.8799644708633423, "learning_rate": 2.8330395935820395e-07, "loss": 0.6908, "step": 12953 }, { "epoch": 0.9119324181626188, "grad_norm": 1.662203311920166, "learning_rate": 2.8285464941222314e-07, "loss": 0.7045, "step": 12954 }, { "epoch": 0.9120028159098909, "grad_norm": 2.0265326499938965, "learning_rate": 2.824056886967127e-07, "loss": 0.6533, "step": 12955 }, { "epoch": 0.912073213657163, "grad_norm": 2.2038724422454834, "learning_rate": 2.819570772350144e-07, "loss": 0.6262, "step": 12956 }, { "epoch": 0.912143611404435, "grad_norm": 2.0032784938812256, "learning_rate": 2.815088150504508e-07, "loss": 0.5833, "step": 12957 }, { "epoch": 0.9122140091517071, "grad_norm": 1.745127558708191, "learning_rate": 2.8106090216632873e-07, "loss": 0.6171, "step": 12958 }, { "epoch": 0.9122844068989793, "grad_norm": 2.322532892227173, "learning_rate": 2.806133386059357e-07, "loss": 0.6989, "step": 12959 }, { "epoch": 0.9123548046462513, "grad_norm": 1.5998344421386719, "learning_rate": 2.801661243925406e-07, "loss": 0.6727, "step": 12960 }, { "epoch": 0.9124252023935234, "grad_norm": 2.2188775539398193, "learning_rate": 2.7971925954939513e-07, "loss": 0.7142, "step": 12961 }, { "epoch": 0.9124956001407954, "grad_norm": 2.630490303039551, "learning_rate": 2.792727440997308e-07, "loss": 0.6296, "step": 12962 }, { "epoch": 0.9125659978880676, "grad_norm": 2.212928295135498, "learning_rate": 2.788265780667628e-07, "loss": 0.5461, "step": 12963 }, { "epoch": 0.9126363956353397, "grad_norm": 2.1200106143951416, "learning_rate": 2.783807614736893e-07, "loss": 0.6577, "step": 12964 }, { "epoch": 0.9127067933826117, "grad_norm": 2.518336296081543, "learning_rate": 2.779352943436858e-07, "loss": 0.5986, "step": 12965 }, { "epoch": 0.9127771911298839, "grad_norm": 2.000070095062256, "learning_rate": 2.774901766999155e-07, "loss": 0.721, "step": 12966 }, { "epoch": 0.9128475888771559, "grad_norm": 2.003119945526123, "learning_rate": 2.770454085655197e-07, "loss": 0.5464, "step": 12967 }, { "epoch": 0.912917986624428, "grad_norm": 1.9646438360214233, "learning_rate": 2.766009899636213e-07, "loss": 0.6372, "step": 12968 }, { "epoch": 0.9129883843717002, "grad_norm": 1.8579083681106567, "learning_rate": 2.761569209173257e-07, "loss": 0.6781, "step": 12969 }, { "epoch": 0.9130587821189722, "grad_norm": 2.1277284622192383, "learning_rate": 2.7571320144972155e-07, "loss": 0.5659, "step": 12970 }, { "epoch": 0.9131291798662443, "grad_norm": 2.117549180984497, "learning_rate": 2.7526983158387794e-07, "loss": 0.6738, "step": 12971 }, { "epoch": 0.9131995776135163, "grad_norm": 2.224393367767334, "learning_rate": 2.748268113428461e-07, "loss": 0.6591, "step": 12972 }, { "epoch": 0.9132699753607885, "grad_norm": 1.9128360748291016, "learning_rate": 2.7438414074965945e-07, "loss": 0.583, "step": 12973 }, { "epoch": 0.9133403731080605, "grad_norm": 1.6618742942810059, "learning_rate": 2.7394181982733256e-07, "loss": 0.5807, "step": 12974 }, { "epoch": 0.9134107708553326, "grad_norm": 2.0710880756378174, "learning_rate": 2.734998485988608e-07, "loss": 0.6459, "step": 12975 }, { "epoch": 0.9134811686026048, "grad_norm": 2.95670747756958, "learning_rate": 2.730582270872246e-07, "loss": 0.6601, "step": 12976 }, { "epoch": 0.9135515663498768, "grad_norm": 2.03173565864563, "learning_rate": 2.7261695531538275e-07, "loss": 0.6073, "step": 12977 }, { "epoch": 0.9136219640971489, "grad_norm": 2.3559954166412354, "learning_rate": 2.721760333062793e-07, "loss": 0.6074, "step": 12978 }, { "epoch": 0.9136923618444209, "grad_norm": 1.9245232343673706, "learning_rate": 2.717354610828363e-07, "loss": 0.7437, "step": 12979 }, { "epoch": 0.9137627595916931, "grad_norm": 2.0752909183502197, "learning_rate": 2.712952386679606e-07, "loss": 0.7155, "step": 12980 }, { "epoch": 0.9138331573389652, "grad_norm": 2.422154426574707, "learning_rate": 2.7085536608453855e-07, "loss": 0.6325, "step": 12981 }, { "epoch": 0.9139035550862372, "grad_norm": 1.7547389268875122, "learning_rate": 2.7041584335544035e-07, "loss": 0.6994, "step": 12982 }, { "epoch": 0.9139739528335094, "grad_norm": 1.8597640991210938, "learning_rate": 2.699766705035175e-07, "loss": 0.5826, "step": 12983 }, { "epoch": 0.9140443505807814, "grad_norm": 2.288832426071167, "learning_rate": 2.6953784755160203e-07, "loss": 0.7322, "step": 12984 }, { "epoch": 0.9141147483280535, "grad_norm": 1.8130297660827637, "learning_rate": 2.6909937452251054e-07, "loss": 0.6785, "step": 12985 }, { "epoch": 0.9141851460753256, "grad_norm": 2.438861608505249, "learning_rate": 2.6866125143903773e-07, "loss": 0.797, "step": 12986 }, { "epoch": 0.9142555438225977, "grad_norm": 2.048807382583618, "learning_rate": 2.682234783239621e-07, "loss": 0.77, "step": 12987 }, { "epoch": 0.9143259415698698, "grad_norm": 1.7300328016281128, "learning_rate": 2.677860552000457e-07, "loss": 0.5437, "step": 12988 }, { "epoch": 0.9143963393171418, "grad_norm": 1.9139983654022217, "learning_rate": 2.673489820900282e-07, "loss": 0.6075, "step": 12989 }, { "epoch": 0.914466737064414, "grad_norm": 1.7831809520721436, "learning_rate": 2.669122590166352e-07, "loss": 0.6383, "step": 12990 }, { "epoch": 0.9145371348116861, "grad_norm": 2.1173999309539795, "learning_rate": 2.66475886002572e-07, "loss": 0.6657, "step": 12991 }, { "epoch": 0.9146075325589581, "grad_norm": 1.9126523733139038, "learning_rate": 2.660398630705254e-07, "loss": 0.6028, "step": 12992 }, { "epoch": 0.9146779303062302, "grad_norm": 2.1471195220947266, "learning_rate": 2.6560419024316426e-07, "loss": 0.6569, "step": 12993 }, { "epoch": 0.9147483280535023, "grad_norm": 2.1342782974243164, "learning_rate": 2.651688675431412e-07, "loss": 0.6435, "step": 12994 }, { "epoch": 0.9148187258007744, "grad_norm": 1.830386996269226, "learning_rate": 2.6473389499308764e-07, "loss": 0.6426, "step": 12995 }, { "epoch": 0.9148891235480464, "grad_norm": 2.2077248096466064, "learning_rate": 2.642992726156189e-07, "loss": 0.5789, "step": 12996 }, { "epoch": 0.9149595212953185, "grad_norm": 1.9610991477966309, "learning_rate": 2.638650004333316e-07, "loss": 0.5147, "step": 12997 }, { "epoch": 0.9150299190425907, "grad_norm": 2.3692550659179688, "learning_rate": 2.6343107846880284e-07, "loss": 0.6526, "step": 12998 }, { "epoch": 0.9151003167898627, "grad_norm": 2.5062198638916016, "learning_rate": 2.6299750674459354e-07, "loss": 0.6737, "step": 12999 }, { "epoch": 0.9151707145371348, "grad_norm": 2.4291772842407227, "learning_rate": 2.6256428528324424e-07, "loss": 0.6893, "step": 13000 }, { "epoch": 0.9152411122844069, "grad_norm": 1.9411602020263672, "learning_rate": 2.6213141410728093e-07, "loss": 0.6362, "step": 13001 }, { "epoch": 0.915311510031679, "grad_norm": 1.9173519611358643, "learning_rate": 2.616988932392068e-07, "loss": 0.6198, "step": 13002 }, { "epoch": 0.9153819077789511, "grad_norm": 1.8591630458831787, "learning_rate": 2.612667227015113e-07, "loss": 0.7029, "step": 13003 }, { "epoch": 0.9154523055262231, "grad_norm": 1.89719557762146, "learning_rate": 2.6083490251666043e-07, "loss": 0.6149, "step": 13004 }, { "epoch": 0.9155227032734953, "grad_norm": 1.7636524438858032, "learning_rate": 2.6040343270710695e-07, "loss": 0.5997, "step": 13005 }, { "epoch": 0.9155931010207673, "grad_norm": 1.9375889301300049, "learning_rate": 2.5997231329528203e-07, "loss": 0.653, "step": 13006 }, { "epoch": 0.9156634987680394, "grad_norm": 2.4724762439727783, "learning_rate": 2.595415443036011e-07, "loss": 0.6289, "step": 13007 }, { "epoch": 0.9157338965153116, "grad_norm": 2.059335231781006, "learning_rate": 2.591111257544595e-07, "loss": 0.6466, "step": 13008 }, { "epoch": 0.9158042942625836, "grad_norm": 1.795561671257019, "learning_rate": 2.586810576702354e-07, "loss": 0.6271, "step": 13009 }, { "epoch": 0.9158746920098557, "grad_norm": 1.7821000814437866, "learning_rate": 2.582513400732892e-07, "loss": 0.5178, "step": 13010 }, { "epoch": 0.9159450897571277, "grad_norm": 2.222073554992676, "learning_rate": 2.57821972985961e-07, "loss": 0.6334, "step": 13011 }, { "epoch": 0.9160154875043999, "grad_norm": 1.8639497756958008, "learning_rate": 2.573929564305738e-07, "loss": 0.7132, "step": 13012 }, { "epoch": 0.9160858852516719, "grad_norm": 2.181734085083008, "learning_rate": 2.5696429042943437e-07, "loss": 0.604, "step": 13013 }, { "epoch": 0.916156282998944, "grad_norm": 2.5417721271514893, "learning_rate": 2.565359750048276e-07, "loss": 0.6183, "step": 13014 }, { "epoch": 0.9162266807462162, "grad_norm": 1.8888300657272339, "learning_rate": 2.5610801017902297e-07, "loss": 0.5776, "step": 13015 }, { "epoch": 0.9162970784934882, "grad_norm": 2.186311960220337, "learning_rate": 2.5568039597427114e-07, "loss": 0.5999, "step": 13016 }, { "epoch": 0.9163674762407603, "grad_norm": 2.0323121547698975, "learning_rate": 2.5525313241280356e-07, "loss": 0.6397, "step": 13017 }, { "epoch": 0.9164378739880323, "grad_norm": 2.294940233230591, "learning_rate": 2.5482621951683283e-07, "loss": 0.7057, "step": 13018 }, { "epoch": 0.9165082717353045, "grad_norm": 2.631450653076172, "learning_rate": 2.543996573085568e-07, "loss": 0.6175, "step": 13019 }, { "epoch": 0.9165786694825766, "grad_norm": 1.8565332889556885, "learning_rate": 2.539734458101502e-07, "loss": 0.61, "step": 13020 }, { "epoch": 0.9166490672298486, "grad_norm": 2.0079493522644043, "learning_rate": 2.5354758504377516e-07, "loss": 0.7557, "step": 13021 }, { "epoch": 0.9167194649771208, "grad_norm": 1.9346044063568115, "learning_rate": 2.5312207503157124e-07, "loss": 0.7307, "step": 13022 }, { "epoch": 0.9167898627243928, "grad_norm": 1.817567229270935, "learning_rate": 2.52696915795661e-07, "loss": 0.7192, "step": 13023 }, { "epoch": 0.9168602604716649, "grad_norm": 2.1696627140045166, "learning_rate": 2.5227210735814754e-07, "loss": 0.6555, "step": 13024 }, { "epoch": 0.916930658218937, "grad_norm": 1.7943049669265747, "learning_rate": 2.5184764974111996e-07, "loss": 0.5801, "step": 13025 }, { "epoch": 0.9170010559662091, "grad_norm": 1.8890653848648071, "learning_rate": 2.5142354296664246e-07, "loss": 0.6304, "step": 13026 }, { "epoch": 0.9170714537134812, "grad_norm": 1.8958680629730225, "learning_rate": 2.509997870567685e-07, "loss": 0.6855, "step": 13027 }, { "epoch": 0.9171418514607532, "grad_norm": 2.032759428024292, "learning_rate": 2.5057638203352807e-07, "loss": 0.7247, "step": 13028 }, { "epoch": 0.9172122492080254, "grad_norm": 2.080650568008423, "learning_rate": 2.501533279189341e-07, "loss": 0.557, "step": 13029 }, { "epoch": 0.9172826469552974, "grad_norm": 1.8138928413391113, "learning_rate": 2.497306247349801e-07, "loss": 0.6231, "step": 13030 }, { "epoch": 0.9173530447025695, "grad_norm": 1.764664888381958, "learning_rate": 2.493082725036456e-07, "loss": 0.6258, "step": 13031 }, { "epoch": 0.9174234424498416, "grad_norm": 2.1150505542755127, "learning_rate": 2.488862712468869e-07, "loss": 0.5335, "step": 13032 }, { "epoch": 0.9174938401971137, "grad_norm": 2.2884786128997803, "learning_rate": 2.484646209866461e-07, "loss": 0.6219, "step": 13033 }, { "epoch": 0.9175642379443858, "grad_norm": 1.8056668043136597, "learning_rate": 2.4804332174484453e-07, "loss": 0.7442, "step": 13034 }, { "epoch": 0.9176346356916578, "grad_norm": 1.675096869468689, "learning_rate": 2.476223735433848e-07, "loss": 0.6162, "step": 13035 }, { "epoch": 0.91770503343893, "grad_norm": 2.0619211196899414, "learning_rate": 2.4720177640415306e-07, "loss": 0.5897, "step": 13036 }, { "epoch": 0.9177754311862021, "grad_norm": 2.0062878131866455, "learning_rate": 2.4678153034901705e-07, "loss": 0.539, "step": 13037 }, { "epoch": 0.9178458289334741, "grad_norm": 2.2292983531951904, "learning_rate": 2.463616353998249e-07, "loss": 0.6331, "step": 13038 }, { "epoch": 0.9179162266807462, "grad_norm": 2.533203363418579, "learning_rate": 2.459420915784085e-07, "loss": 0.7321, "step": 13039 }, { "epoch": 0.9179866244280183, "grad_norm": 2.214169979095459, "learning_rate": 2.4552289890657954e-07, "loss": 0.5964, "step": 13040 }, { "epoch": 0.9180570221752904, "grad_norm": 2.030954122543335, "learning_rate": 2.4510405740613175e-07, "loss": 0.6614, "step": 13041 }, { "epoch": 0.9181274199225625, "grad_norm": 2.361471652984619, "learning_rate": 2.446855670988418e-07, "loss": 0.588, "step": 13042 }, { "epoch": 0.9181978176698345, "grad_norm": 2.0715253353118896, "learning_rate": 2.4426742800646703e-07, "loss": 0.7429, "step": 13043 }, { "epoch": 0.9182682154171067, "grad_norm": 2.3849236965179443, "learning_rate": 2.4384964015074684e-07, "loss": 0.6672, "step": 13044 }, { "epoch": 0.9183386131643787, "grad_norm": 1.8552350997924805, "learning_rate": 2.434322035534034e-07, "loss": 0.6067, "step": 13045 }, { "epoch": 0.9184090109116508, "grad_norm": 2.0291528701782227, "learning_rate": 2.430151182361382e-07, "loss": 0.6855, "step": 13046 }, { "epoch": 0.918479408658923, "grad_norm": 2.0626068115234375, "learning_rate": 2.425983842206368e-07, "loss": 0.593, "step": 13047 }, { "epoch": 0.918549806406195, "grad_norm": 2.356649160385132, "learning_rate": 2.4218200152856494e-07, "loss": 0.6326, "step": 13048 }, { "epoch": 0.9186202041534671, "grad_norm": 1.737630009651184, "learning_rate": 2.4176597018157095e-07, "loss": 0.6072, "step": 13049 }, { "epoch": 0.9186906019007391, "grad_norm": 2.207005023956299, "learning_rate": 2.4135029020128475e-07, "loss": 0.6367, "step": 13050 }, { "epoch": 0.9187609996480113, "grad_norm": 2.361814498901367, "learning_rate": 2.4093496160931894e-07, "loss": 0.5959, "step": 13051 }, { "epoch": 0.9188313973952833, "grad_norm": 1.9990813732147217, "learning_rate": 2.405199844272653e-07, "loss": 0.6499, "step": 13052 }, { "epoch": 0.9189017951425554, "grad_norm": 1.9525548219680786, "learning_rate": 2.401053586766992e-07, "loss": 0.6243, "step": 13053 }, { "epoch": 0.9189721928898276, "grad_norm": 2.1572964191436768, "learning_rate": 2.3969108437917754e-07, "loss": 0.6033, "step": 13054 }, { "epoch": 0.9190425906370996, "grad_norm": 2.3277482986450195, "learning_rate": 2.39277161556239e-07, "loss": 0.645, "step": 13055 }, { "epoch": 0.9191129883843717, "grad_norm": 2.484318256378174, "learning_rate": 2.3886359022940395e-07, "loss": 0.6301, "step": 13056 }, { "epoch": 0.9191833861316437, "grad_norm": 2.177945852279663, "learning_rate": 2.384503704201739e-07, "loss": 0.7344, "step": 13057 }, { "epoch": 0.9192537838789159, "grad_norm": 2.267620086669922, "learning_rate": 2.3803750215003338e-07, "loss": 0.6214, "step": 13058 }, { "epoch": 0.919324181626188, "grad_norm": 1.8809915781021118, "learning_rate": 2.3762498544044587e-07, "loss": 0.5785, "step": 13059 }, { "epoch": 0.91939457937346, "grad_norm": 2.6290054321289062, "learning_rate": 2.3721282031286007e-07, "loss": 0.6356, "step": 13060 }, { "epoch": 0.9194649771207322, "grad_norm": 1.8903818130493164, "learning_rate": 2.368010067887045e-07, "loss": 0.6286, "step": 13061 }, { "epoch": 0.9195353748680042, "grad_norm": 2.1426987648010254, "learning_rate": 2.3638954488939068e-07, "loss": 0.6891, "step": 13062 }, { "epoch": 0.9196057726152763, "grad_norm": 2.0700857639312744, "learning_rate": 2.3597843463630897e-07, "loss": 0.7017, "step": 13063 }, { "epoch": 0.9196761703625485, "grad_norm": 1.745529294013977, "learning_rate": 2.3556767605083505e-07, "loss": 0.5734, "step": 13064 }, { "epoch": 0.9197465681098205, "grad_norm": 1.6505508422851562, "learning_rate": 2.3515726915432288e-07, "loss": 0.6678, "step": 13065 }, { "epoch": 0.9198169658570926, "grad_norm": 2.035557746887207, "learning_rate": 2.347472139681116e-07, "loss": 0.6441, "step": 13066 }, { "epoch": 0.9198873636043646, "grad_norm": 1.969401240348816, "learning_rate": 2.3433751051351858e-07, "loss": 0.634, "step": 13067 }, { "epoch": 0.9199577613516368, "grad_norm": 1.529972791671753, "learning_rate": 2.3392815881184724e-07, "loss": 0.5595, "step": 13068 }, { "epoch": 0.9200281590989088, "grad_norm": 1.8371819257736206, "learning_rate": 2.3351915888437768e-07, "loss": 0.6409, "step": 13069 }, { "epoch": 0.9200985568461809, "grad_norm": 3.101969003677368, "learning_rate": 2.3311051075237598e-07, "loss": 0.5358, "step": 13070 }, { "epoch": 0.920168954593453, "grad_norm": 2.170285701751709, "learning_rate": 2.327022144370865e-07, "loss": 0.5833, "step": 13071 }, { "epoch": 0.9202393523407251, "grad_norm": 2.1048662662506104, "learning_rate": 2.32294269959738e-07, "loss": 0.6662, "step": 13072 }, { "epoch": 0.9203097500879972, "grad_norm": 2.122101306915283, "learning_rate": 2.318866773415391e-07, "loss": 0.6606, "step": 13073 }, { "epoch": 0.9203801478352692, "grad_norm": 1.8412833213806152, "learning_rate": 2.3147943660368207e-07, "loss": 0.8097, "step": 13074 }, { "epoch": 0.9204505455825414, "grad_norm": 1.8397619724273682, "learning_rate": 2.3107254776733897e-07, "loss": 0.6999, "step": 13075 }, { "epoch": 0.9205209433298135, "grad_norm": 2.167442560195923, "learning_rate": 2.3066601085366478e-07, "loss": 0.6782, "step": 13076 }, { "epoch": 0.9205913410770855, "grad_norm": 2.13474702835083, "learning_rate": 2.3025982588379423e-07, "loss": 0.7624, "step": 13077 }, { "epoch": 0.9206617388243576, "grad_norm": 2.35141658782959, "learning_rate": 2.2985399287884733e-07, "loss": 0.6351, "step": 13078 }, { "epoch": 0.9207321365716297, "grad_norm": 1.8389403820037842, "learning_rate": 2.2944851185992154e-07, "loss": 0.6104, "step": 13079 }, { "epoch": 0.9208025343189018, "grad_norm": 2.2518162727355957, "learning_rate": 2.2904338284810032e-07, "loss": 0.5764, "step": 13080 }, { "epoch": 0.9208729320661739, "grad_norm": 2.2403578758239746, "learning_rate": 2.2863860586444618e-07, "loss": 0.6247, "step": 13081 }, { "epoch": 0.920943329813446, "grad_norm": 1.8059453964233398, "learning_rate": 2.2823418093000368e-07, "loss": 0.6423, "step": 13082 }, { "epoch": 0.9210137275607181, "grad_norm": 2.5895965099334717, "learning_rate": 2.2783010806579727e-07, "loss": 0.6983, "step": 13083 }, { "epoch": 0.9210841253079901, "grad_norm": 1.9550738334655762, "learning_rate": 2.2742638729283815e-07, "loss": 0.7081, "step": 13084 }, { "epoch": 0.9211545230552622, "grad_norm": 1.832158088684082, "learning_rate": 2.2702301863211416e-07, "loss": 0.6215, "step": 13085 }, { "epoch": 0.9212249208025343, "grad_norm": 1.8998596668243408, "learning_rate": 2.2662000210459764e-07, "loss": 0.6529, "step": 13086 }, { "epoch": 0.9212953185498064, "grad_norm": 1.9926947355270386, "learning_rate": 2.2621733773124153e-07, "loss": 0.6663, "step": 13087 }, { "epoch": 0.9213657162970785, "grad_norm": 1.77881920337677, "learning_rate": 2.258150255329816e-07, "loss": 0.592, "step": 13088 }, { "epoch": 0.9214361140443506, "grad_norm": 2.2827601432800293, "learning_rate": 2.2541306553073192e-07, "loss": 0.6129, "step": 13089 }, { "epoch": 0.9215065117916227, "grad_norm": 1.9593254327774048, "learning_rate": 2.2501145774539333e-07, "loss": 0.7778, "step": 13090 }, { "epoch": 0.9215769095388947, "grad_norm": 3.1792967319488525, "learning_rate": 2.246102021978441e-07, "loss": 0.7215, "step": 13091 }, { "epoch": 0.9216473072861668, "grad_norm": 2.518643856048584, "learning_rate": 2.2420929890894781e-07, "loss": 0.6812, "step": 13092 }, { "epoch": 0.921717705033439, "grad_norm": 2.1951982975006104, "learning_rate": 2.2380874789954618e-07, "loss": 0.5847, "step": 13093 }, { "epoch": 0.921788102780711, "grad_norm": 2.02725887298584, "learning_rate": 2.2340854919046472e-07, "loss": 0.5821, "step": 13094 }, { "epoch": 0.9218585005279831, "grad_norm": 1.802482008934021, "learning_rate": 2.230087028025094e-07, "loss": 0.6779, "step": 13095 }, { "epoch": 0.9219288982752551, "grad_norm": 2.1034321784973145, "learning_rate": 2.2260920875646916e-07, "loss": 0.565, "step": 13096 }, { "epoch": 0.9219992960225273, "grad_norm": 1.8127824068069458, "learning_rate": 2.222100670731143e-07, "loss": 0.5812, "step": 13097 }, { "epoch": 0.9220696937697994, "grad_norm": 1.7618314027786255, "learning_rate": 2.218112777731972e-07, "loss": 0.6715, "step": 13098 }, { "epoch": 0.9221400915170714, "grad_norm": 3.5270285606384277, "learning_rate": 2.2141284087745006e-07, "loss": 0.7049, "step": 13099 }, { "epoch": 0.9222104892643436, "grad_norm": 1.846886396408081, "learning_rate": 2.2101475640658718e-07, "loss": 0.6535, "step": 13100 }, { "epoch": 0.9222808870116156, "grad_norm": 2.216886281967163, "learning_rate": 2.2061702438130814e-07, "loss": 0.6623, "step": 13101 }, { "epoch": 0.9223512847588877, "grad_norm": 1.9087120294570923, "learning_rate": 2.202196448222884e-07, "loss": 0.5886, "step": 13102 }, { "epoch": 0.9224216825061599, "grad_norm": 1.849780797958374, "learning_rate": 2.1982261775019017e-07, "loss": 0.6214, "step": 13103 }, { "epoch": 0.9224920802534319, "grad_norm": 1.9401655197143555, "learning_rate": 2.1942594318565477e-07, "loss": 0.6308, "step": 13104 }, { "epoch": 0.922562478000704, "grad_norm": 1.6900510787963867, "learning_rate": 2.1902962114930634e-07, "loss": 0.5578, "step": 13105 }, { "epoch": 0.922632875747976, "grad_norm": 1.7932088375091553, "learning_rate": 2.186336516617473e-07, "loss": 0.5953, "step": 13106 }, { "epoch": 0.9227032734952482, "grad_norm": 1.894115686416626, "learning_rate": 2.1823803474356683e-07, "loss": 0.6161, "step": 13107 }, { "epoch": 0.9227736712425202, "grad_norm": 1.8502811193466187, "learning_rate": 2.178427704153324e-07, "loss": 0.6336, "step": 13108 }, { "epoch": 0.9228440689897923, "grad_norm": 2.1144769191741943, "learning_rate": 2.1744785869759586e-07, "loss": 0.697, "step": 13109 }, { "epoch": 0.9229144667370645, "grad_norm": 2.0175631046295166, "learning_rate": 2.170532996108866e-07, "loss": 0.5922, "step": 13110 }, { "epoch": 0.9229848644843365, "grad_norm": 1.9735265970230103, "learning_rate": 2.1665909317572074e-07, "loss": 0.6263, "step": 13111 }, { "epoch": 0.9230552622316086, "grad_norm": 2.23647141456604, "learning_rate": 2.1626523941259113e-07, "loss": 0.7162, "step": 13112 }, { "epoch": 0.9231256599788806, "grad_norm": 2.3847899436950684, "learning_rate": 2.1587173834197504e-07, "loss": 0.6563, "step": 13113 }, { "epoch": 0.9231960577261528, "grad_norm": 2.076019048690796, "learning_rate": 2.154785899843319e-07, "loss": 0.6253, "step": 13114 }, { "epoch": 0.9232664554734249, "grad_norm": 2.0171959400177, "learning_rate": 2.1508579436010088e-07, "loss": 0.6213, "step": 13115 }, { "epoch": 0.9233368532206969, "grad_norm": 1.9714202880859375, "learning_rate": 2.1469335148970414e-07, "loss": 0.6281, "step": 13116 }, { "epoch": 0.923407250967969, "grad_norm": 6.8839616775512695, "learning_rate": 2.1430126139354588e-07, "loss": 0.7458, "step": 13117 }, { "epoch": 0.9234776487152411, "grad_norm": 1.990700602531433, "learning_rate": 2.1390952409201013e-07, "loss": 0.629, "step": 13118 }, { "epoch": 0.9235480464625132, "grad_norm": 1.7642958164215088, "learning_rate": 2.1351813960546462e-07, "loss": 0.6794, "step": 13119 }, { "epoch": 0.9236184442097853, "grad_norm": 2.0247766971588135, "learning_rate": 2.1312710795425603e-07, "loss": 0.7207, "step": 13120 }, { "epoch": 0.9236888419570574, "grad_norm": 2.3110640048980713, "learning_rate": 2.1273642915871637e-07, "loss": 0.7323, "step": 13121 }, { "epoch": 0.9237592397043295, "grad_norm": 1.868674397468567, "learning_rate": 2.123461032391558e-07, "loss": 0.4803, "step": 13122 }, { "epoch": 0.9238296374516015, "grad_norm": 1.85226571559906, "learning_rate": 2.1195613021586978e-07, "loss": 0.6783, "step": 13123 }, { "epoch": 0.9239000351988736, "grad_norm": 2.0512852668762207, "learning_rate": 2.1156651010913196e-07, "loss": 0.6131, "step": 13124 }, { "epoch": 0.9239704329461457, "grad_norm": 2.0776469707489014, "learning_rate": 2.111772429391997e-07, "loss": 0.6858, "step": 13125 }, { "epoch": 0.9240408306934178, "grad_norm": 1.8141531944274902, "learning_rate": 2.1078832872630936e-07, "loss": 0.6824, "step": 13126 }, { "epoch": 0.9241112284406899, "grad_norm": 2.018172025680542, "learning_rate": 2.1039976749068413e-07, "loss": 0.6732, "step": 13127 }, { "epoch": 0.924181626187962, "grad_norm": 1.7046018838882446, "learning_rate": 2.1001155925252223e-07, "loss": 0.6056, "step": 13128 }, { "epoch": 0.9242520239352341, "grad_norm": 1.9714709520339966, "learning_rate": 2.0962370403201036e-07, "loss": 0.6377, "step": 13129 }, { "epoch": 0.9243224216825061, "grad_norm": 2.343332529067993, "learning_rate": 2.09236201849311e-07, "loss": 0.5959, "step": 13130 }, { "epoch": 0.9243928194297782, "grad_norm": 2.0396785736083984, "learning_rate": 2.0884905272457276e-07, "loss": 0.6572, "step": 13131 }, { "epoch": 0.9244632171770504, "grad_norm": 2.8825254440307617, "learning_rate": 2.0846225667792084e-07, "loss": 0.6289, "step": 13132 }, { "epoch": 0.9245336149243224, "grad_norm": 2.236971139907837, "learning_rate": 2.0807581372946803e-07, "loss": 0.6821, "step": 13133 }, { "epoch": 0.9246040126715945, "grad_norm": 1.9789252281188965, "learning_rate": 2.0768972389930462e-07, "loss": 0.5636, "step": 13134 }, { "epoch": 0.9246744104188666, "grad_norm": 1.8933284282684326, "learning_rate": 2.0730398720750374e-07, "loss": 0.5631, "step": 13135 }, { "epoch": 0.9247448081661387, "grad_norm": 1.831667423248291, "learning_rate": 2.069186036741215e-07, "loss": 0.6273, "step": 13136 }, { "epoch": 0.9248152059134108, "grad_norm": 1.6159573793411255, "learning_rate": 2.065335733191922e-07, "loss": 0.6211, "step": 13137 }, { "epoch": 0.9248856036606828, "grad_norm": 1.9434174299240112, "learning_rate": 2.0614889616273456e-07, "loss": 0.6423, "step": 13138 }, { "epoch": 0.924956001407955, "grad_norm": 2.4163131713867188, "learning_rate": 2.0576457222474952e-07, "loss": 0.7161, "step": 13139 }, { "epoch": 0.925026399155227, "grad_norm": 2.02070951461792, "learning_rate": 2.0538060152521698e-07, "loss": 0.6317, "step": 13140 }, { "epoch": 0.9250967969024991, "grad_norm": 1.8030856847763062, "learning_rate": 2.0499698408410128e-07, "loss": 0.602, "step": 13141 }, { "epoch": 0.9251671946497712, "grad_norm": 1.9782562255859375, "learning_rate": 2.046137199213458e-07, "loss": 0.6005, "step": 13142 }, { "epoch": 0.9252375923970433, "grad_norm": 2.7762298583984375, "learning_rate": 2.0423080905687762e-07, "loss": 0.781, "step": 13143 }, { "epoch": 0.9253079901443154, "grad_norm": 1.9437885284423828, "learning_rate": 2.0384825151060358e-07, "loss": 0.6406, "step": 13144 }, { "epoch": 0.9253783878915874, "grad_norm": 2.078758716583252, "learning_rate": 2.0346604730241502e-07, "loss": 0.7721, "step": 13145 }, { "epoch": 0.9254487856388596, "grad_norm": 2.0421524047851562, "learning_rate": 2.0308419645218067e-07, "loss": 0.5779, "step": 13146 }, { "epoch": 0.9255191833861316, "grad_norm": 1.8051934242248535, "learning_rate": 2.0270269897975534e-07, "loss": 0.533, "step": 13147 }, { "epoch": 0.9255895811334037, "grad_norm": 2.3806211948394775, "learning_rate": 2.0232155490497283e-07, "loss": 0.7012, "step": 13148 }, { "epoch": 0.9256599788806759, "grad_norm": 1.9553751945495605, "learning_rate": 2.0194076424764905e-07, "loss": 0.5204, "step": 13149 }, { "epoch": 0.9257303766279479, "grad_norm": 1.771055817604065, "learning_rate": 2.015603270275813e-07, "loss": 0.5484, "step": 13150 }, { "epoch": 0.92580077437522, "grad_norm": 1.739923119544983, "learning_rate": 2.0118024326454897e-07, "loss": 0.6193, "step": 13151 }, { "epoch": 0.925871172122492, "grad_norm": 2.3301503658294678, "learning_rate": 2.0080051297831358e-07, "loss": 0.626, "step": 13152 }, { "epoch": 0.9259415698697642, "grad_norm": 1.8840514421463013, "learning_rate": 2.0042113618861723e-07, "loss": 0.6081, "step": 13153 }, { "epoch": 0.9260119676170363, "grad_norm": 2.526224374771118, "learning_rate": 2.0004211291518415e-07, "loss": 0.7377, "step": 13154 }, { "epoch": 0.9260823653643083, "grad_norm": 1.610671877861023, "learning_rate": 1.996634431777191e-07, "loss": 0.6821, "step": 13155 }, { "epoch": 0.9261527631115805, "grad_norm": 2.4639649391174316, "learning_rate": 1.9928512699591138e-07, "loss": 0.5744, "step": 13156 }, { "epoch": 0.9262231608588525, "grad_norm": 2.242016553878784, "learning_rate": 1.9890716438942846e-07, "loss": 0.7302, "step": 13157 }, { "epoch": 0.9262935586061246, "grad_norm": 2.2868804931640625, "learning_rate": 1.985295553779215e-07, "loss": 0.5721, "step": 13158 }, { "epoch": 0.9263639563533967, "grad_norm": 2.2068183422088623, "learning_rate": 1.9815229998102378e-07, "loss": 0.6501, "step": 13159 }, { "epoch": 0.9264343541006688, "grad_norm": 2.1537744998931885, "learning_rate": 1.9777539821834767e-07, "loss": 0.5598, "step": 13160 }, { "epoch": 0.9265047518479409, "grad_norm": 2.4405174255371094, "learning_rate": 1.9739885010948755e-07, "loss": 0.5704, "step": 13161 }, { "epoch": 0.9265751495952129, "grad_norm": 2.5955545902252197, "learning_rate": 1.9702265567402387e-07, "loss": 0.6337, "step": 13162 }, { "epoch": 0.926645547342485, "grad_norm": 2.023045539855957, "learning_rate": 1.9664681493151227e-07, "loss": 0.6526, "step": 13163 }, { "epoch": 0.9267159450897571, "grad_norm": 1.918657898902893, "learning_rate": 1.9627132790149504e-07, "loss": 0.5941, "step": 13164 }, { "epoch": 0.9267863428370292, "grad_norm": 2.1270759105682373, "learning_rate": 1.9589619460349283e-07, "loss": 0.6627, "step": 13165 }, { "epoch": 0.9268567405843013, "grad_norm": 1.8145078420639038, "learning_rate": 1.9552141505700991e-07, "loss": 0.62, "step": 13166 }, { "epoch": 0.9269271383315734, "grad_norm": 1.891083836555481, "learning_rate": 1.9514698928153118e-07, "loss": 0.7705, "step": 13167 }, { "epoch": 0.9269975360788455, "grad_norm": 1.9336978197097778, "learning_rate": 1.947729172965228e-07, "loss": 0.7599, "step": 13168 }, { "epoch": 0.9270679338261175, "grad_norm": 1.8618131875991821, "learning_rate": 1.9439919912143387e-07, "loss": 0.6633, "step": 13169 }, { "epoch": 0.9271383315733897, "grad_norm": 1.6706620454788208, "learning_rate": 1.9402583477569414e-07, "loss": 0.6003, "step": 13170 }, { "epoch": 0.9272087293206618, "grad_norm": 1.96750807762146, "learning_rate": 1.9365282427871533e-07, "loss": 0.707, "step": 13171 }, { "epoch": 0.9272791270679338, "grad_norm": 1.9514906406402588, "learning_rate": 1.9328016764989065e-07, "loss": 0.672, "step": 13172 }, { "epoch": 0.9273495248152059, "grad_norm": 1.8673120737075806, "learning_rate": 1.929078649085938e-07, "loss": 0.6343, "step": 13173 }, { "epoch": 0.927419922562478, "grad_norm": 2.5881171226501465, "learning_rate": 1.9253591607418218e-07, "loss": 0.6162, "step": 13174 }, { "epoch": 0.9274903203097501, "grad_norm": 2.1870265007019043, "learning_rate": 1.9216432116599302e-07, "loss": 0.6779, "step": 13175 }, { "epoch": 0.9275607180570222, "grad_norm": 2.2599480152130127, "learning_rate": 1.9179308020334717e-07, "loss": 0.6097, "step": 13176 }, { "epoch": 0.9276311158042942, "grad_norm": 1.6916089057922363, "learning_rate": 1.9142219320554532e-07, "loss": 0.7066, "step": 13177 }, { "epoch": 0.9277015135515664, "grad_norm": 2.9274792671203613, "learning_rate": 1.9105166019186947e-07, "loss": 0.7127, "step": 13178 }, { "epoch": 0.9277719112988384, "grad_norm": 2.3721227645874023, "learning_rate": 1.9068148118158302e-07, "loss": 0.6535, "step": 13179 }, { "epoch": 0.9278423090461105, "grad_norm": 2.075751543045044, "learning_rate": 1.9031165619393453e-07, "loss": 0.732, "step": 13180 }, { "epoch": 0.9279127067933826, "grad_norm": 2.7561581134796143, "learning_rate": 1.8994218524815008e-07, "loss": 0.601, "step": 13181 }, { "epoch": 0.9279831045406547, "grad_norm": 2.2195985317230225, "learning_rate": 1.8957306836343868e-07, "loss": 0.784, "step": 13182 }, { "epoch": 0.9280535022879268, "grad_norm": 2.0215773582458496, "learning_rate": 1.8920430555899214e-07, "loss": 0.6118, "step": 13183 }, { "epoch": 0.9281239000351988, "grad_norm": 1.7946466207504272, "learning_rate": 1.8883589685398137e-07, "loss": 0.6397, "step": 13184 }, { "epoch": 0.928194297782471, "grad_norm": 2.0051631927490234, "learning_rate": 1.8846784226756098e-07, "loss": 0.5802, "step": 13185 }, { "epoch": 0.928264695529743, "grad_norm": 1.8493760824203491, "learning_rate": 1.8810014181886604e-07, "loss": 0.6972, "step": 13186 }, { "epoch": 0.9283350932770151, "grad_norm": 1.9925554990768433, "learning_rate": 1.8773279552701382e-07, "loss": 0.6373, "step": 13187 }, { "epoch": 0.9284054910242873, "grad_norm": 1.9798104763031006, "learning_rate": 1.8736580341110375e-07, "loss": 0.6693, "step": 13188 }, { "epoch": 0.9284758887715593, "grad_norm": 2.58467435836792, "learning_rate": 1.8699916549021577e-07, "loss": 0.733, "step": 13189 }, { "epoch": 0.9285462865188314, "grad_norm": 2.282043218612671, "learning_rate": 1.8663288178341042e-07, "loss": 0.517, "step": 13190 }, { "epoch": 0.9286166842661034, "grad_norm": 1.897059679031372, "learning_rate": 1.8626695230973268e-07, "loss": 0.6848, "step": 13191 }, { "epoch": 0.9286870820133756, "grad_norm": 2.2119691371917725, "learning_rate": 1.8590137708820653e-07, "loss": 0.694, "step": 13192 }, { "epoch": 0.9287574797606477, "grad_norm": 2.107862949371338, "learning_rate": 1.855361561378389e-07, "loss": 0.7159, "step": 13193 }, { "epoch": 0.9288278775079197, "grad_norm": 1.9590411186218262, "learning_rate": 1.8517128947761885e-07, "loss": 0.529, "step": 13194 }, { "epoch": 0.9288982752551919, "grad_norm": 1.8786492347717285, "learning_rate": 1.8480677712651515e-07, "loss": 0.5597, "step": 13195 }, { "epoch": 0.9289686730024639, "grad_norm": 2.027925729751587, "learning_rate": 1.844426191034788e-07, "loss": 0.7978, "step": 13196 }, { "epoch": 0.929039070749736, "grad_norm": 2.2738254070281982, "learning_rate": 1.8407881542744364e-07, "loss": 0.6593, "step": 13197 }, { "epoch": 0.9291094684970082, "grad_norm": 1.8106472492218018, "learning_rate": 1.837153661173233e-07, "loss": 0.7261, "step": 13198 }, { "epoch": 0.9291798662442802, "grad_norm": 2.121351957321167, "learning_rate": 1.8335227119201437e-07, "loss": 0.7573, "step": 13199 }, { "epoch": 0.9292502639915523, "grad_norm": 1.849687933921814, "learning_rate": 1.829895306703939e-07, "loss": 0.5338, "step": 13200 }, { "epoch": 0.9293206617388243, "grad_norm": 2.080855131149292, "learning_rate": 1.8262714457132357e-07, "loss": 0.6964, "step": 13201 }, { "epoch": 0.9293910594860965, "grad_norm": 3.83990478515625, "learning_rate": 1.8226511291364e-07, "loss": 0.6795, "step": 13202 }, { "epoch": 0.9294614572333685, "grad_norm": 2.2116787433624268, "learning_rate": 1.8190343571616906e-07, "loss": 0.644, "step": 13203 }, { "epoch": 0.9295318549806406, "grad_norm": 1.9551973342895508, "learning_rate": 1.8154211299771173e-07, "loss": 0.617, "step": 13204 }, { "epoch": 0.9296022527279127, "grad_norm": 2.7091851234436035, "learning_rate": 1.811811447770565e-07, "loss": 0.6156, "step": 13205 }, { "epoch": 0.9296726504751848, "grad_norm": 1.6118403673171997, "learning_rate": 1.8082053107296857e-07, "loss": 0.7251, "step": 13206 }, { "epoch": 0.9297430482224569, "grad_norm": 2.0315260887145996, "learning_rate": 1.8046027190419766e-07, "loss": 0.6883, "step": 13207 }, { "epoch": 0.9298134459697289, "grad_norm": 2.174501895904541, "learning_rate": 1.8010036728947244e-07, "loss": 0.5576, "step": 13208 }, { "epoch": 0.9298838437170011, "grad_norm": 1.9963492155075073, "learning_rate": 1.7974081724750678e-07, "loss": 0.6868, "step": 13209 }, { "epoch": 0.9299542414642732, "grad_norm": 1.8345372676849365, "learning_rate": 1.7938162179699134e-07, "loss": 0.6441, "step": 13210 }, { "epoch": 0.9300246392115452, "grad_norm": 1.867884874343872, "learning_rate": 1.790227809566035e-07, "loss": 0.5886, "step": 13211 }, { "epoch": 0.9300950369588173, "grad_norm": 1.9402841329574585, "learning_rate": 1.786642947449981e-07, "loss": 0.5257, "step": 13212 }, { "epoch": 0.9301654347060894, "grad_norm": 1.792761206626892, "learning_rate": 1.7830616318081527e-07, "loss": 0.7679, "step": 13213 }, { "epoch": 0.9302358324533615, "grad_norm": 1.7937467098236084, "learning_rate": 1.7794838628267096e-07, "loss": 0.5555, "step": 13214 }, { "epoch": 0.9303062302006336, "grad_norm": 1.7643886804580688, "learning_rate": 1.775909640691703e-07, "loss": 0.7003, "step": 13215 }, { "epoch": 0.9303766279479057, "grad_norm": 1.8626103401184082, "learning_rate": 1.7723389655889278e-07, "loss": 0.6716, "step": 13216 }, { "epoch": 0.9304470256951778, "grad_norm": 1.9842243194580078, "learning_rate": 1.768771837704046e-07, "loss": 0.5734, "step": 13217 }, { "epoch": 0.9305174234424498, "grad_norm": 1.9420018196105957, "learning_rate": 1.7652082572225035e-07, "loss": 0.58, "step": 13218 }, { "epoch": 0.9305878211897219, "grad_norm": 3.357947826385498, "learning_rate": 1.7616482243295973e-07, "loss": 0.7061, "step": 13219 }, { "epoch": 0.930658218936994, "grad_norm": 1.9501343965530396, "learning_rate": 1.7580917392103912e-07, "loss": 0.6434, "step": 13220 }, { "epoch": 0.9307286166842661, "grad_norm": 2.627782106399536, "learning_rate": 1.7545388020497942e-07, "loss": 0.5211, "step": 13221 }, { "epoch": 0.9307990144315382, "grad_norm": 2.27801513671875, "learning_rate": 1.7509894130325365e-07, "loss": 0.7786, "step": 13222 }, { "epoch": 0.9308694121788103, "grad_norm": 1.963484525680542, "learning_rate": 1.7474435723431458e-07, "loss": 0.6494, "step": 13223 }, { "epoch": 0.9309398099260824, "grad_norm": 2.280142068862915, "learning_rate": 1.7439012801659793e-07, "loss": 0.5708, "step": 13224 }, { "epoch": 0.9310102076733544, "grad_norm": 2.0318377017974854, "learning_rate": 1.7403625366851994e-07, "loss": 0.6715, "step": 13225 }, { "epoch": 0.9310806054206265, "grad_norm": 2.0464730262756348, "learning_rate": 1.736827342084798e-07, "loss": 0.6637, "step": 13226 }, { "epoch": 0.9311510031678987, "grad_norm": 1.67006254196167, "learning_rate": 1.7332956965485647e-07, "loss": 0.6125, "step": 13227 }, { "epoch": 0.9312214009151707, "grad_norm": 1.8928942680358887, "learning_rate": 1.7297676002601024e-07, "loss": 0.7139, "step": 13228 }, { "epoch": 0.9312917986624428, "grad_norm": 1.724674940109253, "learning_rate": 1.7262430534028672e-07, "loss": 0.5621, "step": 13229 }, { "epoch": 0.9313621964097148, "grad_norm": 2.3279953002929688, "learning_rate": 1.722722056160073e-07, "loss": 0.728, "step": 13230 }, { "epoch": 0.931432594156987, "grad_norm": 2.1211533546447754, "learning_rate": 1.7192046087148027e-07, "loss": 0.626, "step": 13231 }, { "epoch": 0.9315029919042591, "grad_norm": 1.9122998714447021, "learning_rate": 1.7156907112499285e-07, "loss": 0.6419, "step": 13232 }, { "epoch": 0.9315733896515311, "grad_norm": 2.2430953979492188, "learning_rate": 1.7121803639481293e-07, "loss": 0.6629, "step": 13233 }, { "epoch": 0.9316437873988033, "grad_norm": 2.636220932006836, "learning_rate": 1.7086735669919196e-07, "loss": 0.7573, "step": 13234 }, { "epoch": 0.9317141851460753, "grad_norm": 3.4375717639923096, "learning_rate": 1.7051703205636202e-07, "loss": 0.705, "step": 13235 }, { "epoch": 0.9317845828933474, "grad_norm": 2.4830164909362793, "learning_rate": 1.7016706248453583e-07, "loss": 0.6223, "step": 13236 }, { "epoch": 0.9318549806406194, "grad_norm": 1.575363039970398, "learning_rate": 1.6981744800191123e-07, "loss": 0.5804, "step": 13237 }, { "epoch": 0.9319253783878916, "grad_norm": 1.9362457990646362, "learning_rate": 1.69468188626662e-07, "loss": 0.6281, "step": 13238 }, { "epoch": 0.9319957761351637, "grad_norm": 1.8486586809158325, "learning_rate": 1.691192843769488e-07, "loss": 0.5861, "step": 13239 }, { "epoch": 0.9320661738824357, "grad_norm": 2.1544313430786133, "learning_rate": 1.6877073527090879e-07, "loss": 0.6358, "step": 13240 }, { "epoch": 0.9321365716297079, "grad_norm": 2.045125722885132, "learning_rate": 1.6842254132666612e-07, "loss": 0.5861, "step": 13241 }, { "epoch": 0.9322069693769799, "grad_norm": 2.1938016414642334, "learning_rate": 1.6807470256232226e-07, "loss": 0.5933, "step": 13242 }, { "epoch": 0.932277367124252, "grad_norm": 2.2448441982269287, "learning_rate": 1.6772721899596244e-07, "loss": 0.6991, "step": 13243 }, { "epoch": 0.9323477648715242, "grad_norm": 1.9716392755508423, "learning_rate": 1.6738009064565162e-07, "loss": 0.6295, "step": 13244 }, { "epoch": 0.9324181626187962, "grad_norm": 2.060844659805298, "learning_rate": 1.670333175294385e-07, "loss": 0.5153, "step": 13245 }, { "epoch": 0.9324885603660683, "grad_norm": 1.9421509504318237, "learning_rate": 1.6668689966535078e-07, "loss": 0.726, "step": 13246 }, { "epoch": 0.9325589581133403, "grad_norm": 1.948441743850708, "learning_rate": 1.6634083707140057e-07, "loss": 0.6142, "step": 13247 }, { "epoch": 0.9326293558606125, "grad_norm": 1.7198500633239746, "learning_rate": 1.6599512976557828e-07, "loss": 0.7062, "step": 13248 }, { "epoch": 0.9326997536078846, "grad_norm": 1.7886621952056885, "learning_rate": 1.6564977776585875e-07, "loss": 0.6604, "step": 13249 }, { "epoch": 0.9327701513551566, "grad_norm": 2.6718661785125732, "learning_rate": 1.6530478109019742e-07, "loss": 0.6943, "step": 13250 }, { "epoch": 0.9328405491024288, "grad_norm": 1.9216123819351196, "learning_rate": 1.6496013975652945e-07, "loss": 0.7278, "step": 13251 }, { "epoch": 0.9329109468497008, "grad_norm": 1.9470645189285278, "learning_rate": 1.6461585378277453e-07, "loss": 0.5853, "step": 13252 }, { "epoch": 0.9329813445969729, "grad_norm": 1.722367763519287, "learning_rate": 1.6427192318683137e-07, "loss": 0.6484, "step": 13253 }, { "epoch": 0.933051742344245, "grad_norm": 1.7758417129516602, "learning_rate": 1.6392834798658229e-07, "loss": 0.6274, "step": 13254 }, { "epoch": 0.9331221400915171, "grad_norm": 1.610559105873108, "learning_rate": 1.6358512819988946e-07, "loss": 0.5261, "step": 13255 }, { "epoch": 0.9331925378387892, "grad_norm": 1.7869821786880493, "learning_rate": 1.6324226384459717e-07, "loss": 0.7884, "step": 13256 }, { "epoch": 0.9332629355860612, "grad_norm": 1.656443476676941, "learning_rate": 1.6289975493853026e-07, "loss": 0.5256, "step": 13257 }, { "epoch": 0.9333333333333333, "grad_norm": 2.170328378677368, "learning_rate": 1.6255760149949882e-07, "loss": 0.6366, "step": 13258 }, { "epoch": 0.9334037310806054, "grad_norm": 1.839320182800293, "learning_rate": 1.6221580354528887e-07, "loss": 0.6768, "step": 13259 }, { "epoch": 0.9334741288278775, "grad_norm": 1.6912871599197388, "learning_rate": 1.6187436109367236e-07, "loss": 0.7138, "step": 13260 }, { "epoch": 0.9335445265751496, "grad_norm": 2.146909713745117, "learning_rate": 1.6153327416240116e-07, "loss": 0.4927, "step": 13261 }, { "epoch": 0.9336149243224217, "grad_norm": 1.9304616451263428, "learning_rate": 1.6119254276920837e-07, "loss": 0.6376, "step": 13262 }, { "epoch": 0.9336853220696938, "grad_norm": 1.9058887958526611, "learning_rate": 1.608521669318077e-07, "loss": 0.594, "step": 13263 }, { "epoch": 0.9337557198169658, "grad_norm": 1.781761646270752, "learning_rate": 1.6051214666789813e-07, "loss": 0.5923, "step": 13264 }, { "epoch": 0.933826117564238, "grad_norm": 1.7289245128631592, "learning_rate": 1.6017248199515533e-07, "loss": 0.5824, "step": 13265 }, { "epoch": 0.9338965153115101, "grad_norm": 2.367927074432373, "learning_rate": 1.598331729312401e-07, "loss": 0.6318, "step": 13266 }, { "epoch": 0.9339669130587821, "grad_norm": 1.934836983680725, "learning_rate": 1.5949421949379317e-07, "loss": 0.6204, "step": 13267 }, { "epoch": 0.9340373108060542, "grad_norm": 2.6572108268737793, "learning_rate": 1.5915562170043727e-07, "loss": 0.7278, "step": 13268 }, { "epoch": 0.9341077085533263, "grad_norm": 1.6981714963912964, "learning_rate": 1.5881737956877506e-07, "loss": 0.6244, "step": 13269 }, { "epoch": 0.9341781063005984, "grad_norm": 1.9275212287902832, "learning_rate": 1.5847949311639354e-07, "loss": 0.6607, "step": 13270 }, { "epoch": 0.9342485040478705, "grad_norm": 1.800333023071289, "learning_rate": 1.5814196236085876e-07, "loss": 0.6623, "step": 13271 }, { "epoch": 0.9343189017951425, "grad_norm": 2.1838483810424805, "learning_rate": 1.578047873197197e-07, "loss": 0.7002, "step": 13272 }, { "epoch": 0.9343892995424147, "grad_norm": 2.0434207916259766, "learning_rate": 1.5746796801050745e-07, "loss": 0.6801, "step": 13273 }, { "epoch": 0.9344596972896867, "grad_norm": 2.0047900676727295, "learning_rate": 1.571315044507321e-07, "loss": 0.7772, "step": 13274 }, { "epoch": 0.9345300950369588, "grad_norm": 2.4842631816864014, "learning_rate": 1.567953966578859e-07, "loss": 0.7029, "step": 13275 }, { "epoch": 0.9346004927842309, "grad_norm": 1.7539458274841309, "learning_rate": 1.5645964464944628e-07, "loss": 0.6499, "step": 13276 }, { "epoch": 0.934670890531503, "grad_norm": 2.2985599040985107, "learning_rate": 1.5612424844286588e-07, "loss": 0.6999, "step": 13277 }, { "epoch": 0.9347412882787751, "grad_norm": 1.8434758186340332, "learning_rate": 1.557892080555856e-07, "loss": 0.7079, "step": 13278 }, { "epoch": 0.9348116860260471, "grad_norm": 2.716810703277588, "learning_rate": 1.554545235050223e-07, "loss": 0.6821, "step": 13279 }, { "epoch": 0.9348820837733193, "grad_norm": 2.051342248916626, "learning_rate": 1.551201948085773e-07, "loss": 0.6324, "step": 13280 }, { "epoch": 0.9349524815205913, "grad_norm": 2.02905535697937, "learning_rate": 1.5478622198363168e-07, "loss": 0.6089, "step": 13281 }, { "epoch": 0.9350228792678634, "grad_norm": 1.7197540998458862, "learning_rate": 1.5445260504755098e-07, "loss": 0.6502, "step": 13282 }, { "epoch": 0.9350932770151356, "grad_norm": 2.109349250793457, "learning_rate": 1.5411934401767745e-07, "loss": 0.6388, "step": 13283 }, { "epoch": 0.9351636747624076, "grad_norm": 1.775551676750183, "learning_rate": 1.537864389113409e-07, "loss": 0.5783, "step": 13284 }, { "epoch": 0.9352340725096797, "grad_norm": 2.0438787937164307, "learning_rate": 1.5345388974584707e-07, "loss": 0.6185, "step": 13285 }, { "epoch": 0.9353044702569517, "grad_norm": 1.9314522743225098, "learning_rate": 1.5312169653848605e-07, "loss": 0.7186, "step": 13286 }, { "epoch": 0.9353748680042239, "grad_norm": 1.8685283660888672, "learning_rate": 1.5278985930652865e-07, "loss": 0.7352, "step": 13287 }, { "epoch": 0.935445265751496, "grad_norm": 1.9805548191070557, "learning_rate": 1.5245837806722773e-07, "loss": 0.6185, "step": 13288 }, { "epoch": 0.935515663498768, "grad_norm": 2.463775396347046, "learning_rate": 1.5212725283781668e-07, "loss": 0.7226, "step": 13289 }, { "epoch": 0.9355860612460402, "grad_norm": 1.7719370126724243, "learning_rate": 1.5179648363551267e-07, "loss": 0.5854, "step": 13290 }, { "epoch": 0.9356564589933122, "grad_norm": 1.967349886894226, "learning_rate": 1.5146607047751104e-07, "loss": 0.7254, "step": 13291 }, { "epoch": 0.9357268567405843, "grad_norm": 1.8770520687103271, "learning_rate": 1.5113601338099159e-07, "loss": 0.5775, "step": 13292 }, { "epoch": 0.9357972544878563, "grad_norm": 2.4764773845672607, "learning_rate": 1.5080631236311236e-07, "loss": 0.6579, "step": 13293 }, { "epoch": 0.9358676522351285, "grad_norm": 2.5551488399505615, "learning_rate": 1.504769674410167e-07, "loss": 0.5611, "step": 13294 }, { "epoch": 0.9359380499824006, "grad_norm": 1.8475490808486938, "learning_rate": 1.5014797863182604e-07, "loss": 0.6772, "step": 13295 }, { "epoch": 0.9360084477296726, "grad_norm": 2.067599296569824, "learning_rate": 1.4981934595264568e-07, "loss": 0.5575, "step": 13296 }, { "epoch": 0.9360788454769448, "grad_norm": 2.09792423248291, "learning_rate": 1.494910694205621e-07, "loss": 0.6199, "step": 13297 }, { "epoch": 0.9361492432242168, "grad_norm": 1.8727668523788452, "learning_rate": 1.4916314905264173e-07, "loss": 0.6404, "step": 13298 }, { "epoch": 0.9362196409714889, "grad_norm": 2.1438350677490234, "learning_rate": 1.4883558486593374e-07, "loss": 0.6948, "step": 13299 }, { "epoch": 0.936290038718761, "grad_norm": 1.7700340747833252, "learning_rate": 1.48508376877468e-07, "loss": 0.6396, "step": 13300 }, { "epoch": 0.9363604364660331, "grad_norm": 2.792051315307617, "learning_rate": 1.481815251042572e-07, "loss": 0.6994, "step": 13301 }, { "epoch": 0.9364308342133052, "grad_norm": 2.284693717956543, "learning_rate": 1.4785502956329387e-07, "loss": 0.5517, "step": 13302 }, { "epoch": 0.9365012319605772, "grad_norm": 1.953615427017212, "learning_rate": 1.4752889027155426e-07, "loss": 0.6143, "step": 13303 }, { "epoch": 0.9365716297078494, "grad_norm": 2.183960437774658, "learning_rate": 1.4720310724599273e-07, "loss": 0.6914, "step": 13304 }, { "epoch": 0.9366420274551215, "grad_norm": 1.885040283203125, "learning_rate": 1.468776805035482e-07, "loss": 0.6171, "step": 13305 }, { "epoch": 0.9367124252023935, "grad_norm": 2.572415590286255, "learning_rate": 1.4655261006113938e-07, "loss": 0.5617, "step": 13306 }, { "epoch": 0.9367828229496656, "grad_norm": 2.4824862480163574, "learning_rate": 1.4622789593566787e-07, "loss": 0.6255, "step": 13307 }, { "epoch": 0.9368532206969377, "grad_norm": 2.0509915351867676, "learning_rate": 1.4590353814401423e-07, "loss": 0.6224, "step": 13308 }, { "epoch": 0.9369236184442098, "grad_norm": 2.0128326416015625, "learning_rate": 1.4557953670304436e-07, "loss": 0.7122, "step": 13309 }, { "epoch": 0.9369940161914819, "grad_norm": 1.8022143840789795, "learning_rate": 1.4525589162960072e-07, "loss": 0.5587, "step": 13310 }, { "epoch": 0.937064413938754, "grad_norm": 1.9159172773361206, "learning_rate": 1.449326029405127e-07, "loss": 0.5955, "step": 13311 }, { "epoch": 0.9371348116860261, "grad_norm": 1.9758696556091309, "learning_rate": 1.4460967065258546e-07, "loss": 0.7471, "step": 13312 }, { "epoch": 0.9372052094332981, "grad_norm": 1.606368064880371, "learning_rate": 1.4428709478261103e-07, "loss": 0.5827, "step": 13313 }, { "epoch": 0.9372756071805702, "grad_norm": 2.092729091644287, "learning_rate": 1.439648753473589e-07, "loss": 0.6878, "step": 13314 }, { "epoch": 0.9373460049278423, "grad_norm": 2.2854456901550293, "learning_rate": 1.4364301236358302e-07, "loss": 0.7303, "step": 13315 }, { "epoch": 0.9374164026751144, "grad_norm": 2.153390407562256, "learning_rate": 1.433215058480155e-07, "loss": 0.6701, "step": 13316 }, { "epoch": 0.9374868004223865, "grad_norm": 1.978127360343933, "learning_rate": 1.4300035581737302e-07, "loss": 0.6428, "step": 13317 }, { "epoch": 0.9375571981696585, "grad_norm": 2.17630672454834, "learning_rate": 1.4267956228835122e-07, "loss": 0.6547, "step": 13318 }, { "epoch": 0.9376275959169307, "grad_norm": 1.8432931900024414, "learning_rate": 1.4235912527763094e-07, "loss": 0.6216, "step": 13319 }, { "epoch": 0.9376979936642027, "grad_norm": 1.869399905204773, "learning_rate": 1.42039044801869e-07, "loss": 0.5833, "step": 13320 }, { "epoch": 0.9377683914114748, "grad_norm": 1.9556959867477417, "learning_rate": 1.4171932087770978e-07, "loss": 0.611, "step": 13321 }, { "epoch": 0.937838789158747, "grad_norm": 2.006492853164673, "learning_rate": 1.413999535217727e-07, "loss": 0.6882, "step": 13322 }, { "epoch": 0.937909186906019, "grad_norm": 2.170966625213623, "learning_rate": 1.4108094275066407e-07, "loss": 0.6829, "step": 13323 }, { "epoch": 0.9379795846532911, "grad_norm": 2.059617280960083, "learning_rate": 1.407622885809684e-07, "loss": 0.5758, "step": 13324 }, { "epoch": 0.9380499824005631, "grad_norm": 1.845617651939392, "learning_rate": 1.4044399102925387e-07, "loss": 0.6816, "step": 13325 }, { "epoch": 0.9381203801478353, "grad_norm": 2.020054817199707, "learning_rate": 1.4012605011206846e-07, "loss": 0.6442, "step": 13326 }, { "epoch": 0.9381907778951074, "grad_norm": 2.0616862773895264, "learning_rate": 1.3980846584594387e-07, "loss": 0.635, "step": 13327 }, { "epoch": 0.9382611756423794, "grad_norm": 1.9472640752792358, "learning_rate": 1.394912382473884e-07, "loss": 0.5763, "step": 13328 }, { "epoch": 0.9383315733896516, "grad_norm": 1.9161458015441895, "learning_rate": 1.3917436733289722e-07, "loss": 0.672, "step": 13329 }, { "epoch": 0.9384019711369236, "grad_norm": 2.8704330921173096, "learning_rate": 1.388578531189437e-07, "loss": 0.6403, "step": 13330 }, { "epoch": 0.9384723688841957, "grad_norm": 1.9875990152359009, "learning_rate": 1.3854169562198493e-07, "loss": 0.5648, "step": 13331 }, { "epoch": 0.9385427666314677, "grad_norm": 2.2443625926971436, "learning_rate": 1.3822589485845693e-07, "loss": 0.7358, "step": 13332 }, { "epoch": 0.9386131643787399, "grad_norm": 2.2825517654418945, "learning_rate": 1.3791045084477948e-07, "loss": 0.523, "step": 13333 }, { "epoch": 0.938683562126012, "grad_norm": 1.9903979301452637, "learning_rate": 1.3759536359735292e-07, "loss": 0.6266, "step": 13334 }, { "epoch": 0.938753959873284, "grad_norm": 1.855832815170288, "learning_rate": 1.3728063313255734e-07, "loss": 0.5957, "step": 13335 }, { "epoch": 0.9388243576205562, "grad_norm": 1.8685144186019897, "learning_rate": 1.3696625946675734e-07, "loss": 0.6714, "step": 13336 }, { "epoch": 0.9388947553678282, "grad_norm": 1.9417548179626465, "learning_rate": 1.3665224261629726e-07, "loss": 0.646, "step": 13337 }, { "epoch": 0.9389651531151003, "grad_norm": 2.1069509983062744, "learning_rate": 1.3633858259750208e-07, "loss": 0.6027, "step": 13338 }, { "epoch": 0.9390355508623724, "grad_norm": 1.876248836517334, "learning_rate": 1.3602527942668041e-07, "loss": 0.624, "step": 13339 }, { "epoch": 0.9391059486096445, "grad_norm": 2.1138436794281006, "learning_rate": 1.3571233312012143e-07, "loss": 0.7145, "step": 13340 }, { "epoch": 0.9391763463569166, "grad_norm": 1.8298373222351074, "learning_rate": 1.353997436940949e-07, "loss": 0.6999, "step": 13341 }, { "epoch": 0.9392467441041886, "grad_norm": 2.4271535873413086, "learning_rate": 1.3508751116485196e-07, "loss": 0.6155, "step": 13342 }, { "epoch": 0.9393171418514608, "grad_norm": 2.5883007049560547, "learning_rate": 1.3477563554862736e-07, "loss": 0.5719, "step": 13343 }, { "epoch": 0.9393875395987329, "grad_norm": 2.2408640384674072, "learning_rate": 1.344641168616334e-07, "loss": 0.6879, "step": 13344 }, { "epoch": 0.9394579373460049, "grad_norm": 2.0020039081573486, "learning_rate": 1.3415295512006985e-07, "loss": 0.6111, "step": 13345 }, { "epoch": 0.939528335093277, "grad_norm": 2.008058547973633, "learning_rate": 1.3384215034011094e-07, "loss": 0.6138, "step": 13346 }, { "epoch": 0.9395987328405491, "grad_norm": 1.7638314962387085, "learning_rate": 1.3353170253791758e-07, "loss": 0.5324, "step": 13347 }, { "epoch": 0.9396691305878212, "grad_norm": 2.1924805641174316, "learning_rate": 1.3322161172962898e-07, "loss": 0.4768, "step": 13348 }, { "epoch": 0.9397395283350932, "grad_norm": 2.082374095916748, "learning_rate": 1.3291187793136884e-07, "loss": 0.58, "step": 13349 }, { "epoch": 0.9398099260823654, "grad_norm": 1.7560726404190063, "learning_rate": 1.3260250115923823e-07, "loss": 0.7549, "step": 13350 }, { "epoch": 0.9398803238296375, "grad_norm": 1.7496052980422974, "learning_rate": 1.322934814293235e-07, "loss": 0.6133, "step": 13351 }, { "epoch": 0.9399507215769095, "grad_norm": 2.5254268646240234, "learning_rate": 1.3198481875769085e-07, "loss": 0.6304, "step": 13352 }, { "epoch": 0.9400211193241816, "grad_norm": 1.9248766899108887, "learning_rate": 1.316765131603862e-07, "loss": 0.6336, "step": 13353 }, { "epoch": 0.9400915170714537, "grad_norm": 2.0074350833892822, "learning_rate": 1.313685646534415e-07, "loss": 0.6809, "step": 13354 }, { "epoch": 0.9401619148187258, "grad_norm": 1.9991869926452637, "learning_rate": 1.3106097325286458e-07, "loss": 0.7303, "step": 13355 }, { "epoch": 0.9402323125659979, "grad_norm": 2.7196602821350098, "learning_rate": 1.3075373897464938e-07, "loss": 0.6888, "step": 13356 }, { "epoch": 0.94030271031327, "grad_norm": 1.6328494548797607, "learning_rate": 1.3044686183476794e-07, "loss": 0.5732, "step": 13357 }, { "epoch": 0.9403731080605421, "grad_norm": 2.0037388801574707, "learning_rate": 1.3014034184917612e-07, "loss": 0.6235, "step": 13358 }, { "epoch": 0.9404435058078141, "grad_norm": 2.0218405723571777, "learning_rate": 1.2983417903380867e-07, "loss": 0.6314, "step": 13359 }, { "epoch": 0.9405139035550862, "grad_norm": 1.667686104774475, "learning_rate": 1.295283734045849e-07, "loss": 0.7063, "step": 13360 }, { "epoch": 0.9405843013023584, "grad_norm": 2.0363080501556396, "learning_rate": 1.2922292497740305e-07, "loss": 0.5657, "step": 13361 }, { "epoch": 0.9406546990496304, "grad_norm": 2.6451399326324463, "learning_rate": 1.2891783376814437e-07, "loss": 0.5946, "step": 13362 }, { "epoch": 0.9407250967969025, "grad_norm": 1.9126780033111572, "learning_rate": 1.2861309979266977e-07, "loss": 0.6181, "step": 13363 }, { "epoch": 0.9407954945441745, "grad_norm": 2.5435376167297363, "learning_rate": 1.2830872306682395e-07, "loss": 0.7088, "step": 13364 }, { "epoch": 0.9408658922914467, "grad_norm": 1.936307668685913, "learning_rate": 1.280047036064298e-07, "loss": 0.6383, "step": 13365 }, { "epoch": 0.9409362900387188, "grad_norm": 2.2764346599578857, "learning_rate": 1.277010414272962e-07, "loss": 0.5562, "step": 13366 }, { "epoch": 0.9410066877859908, "grad_norm": 2.0958378314971924, "learning_rate": 1.2739773654520804e-07, "loss": 0.7162, "step": 13367 }, { "epoch": 0.941077085533263, "grad_norm": 2.5358734130859375, "learning_rate": 1.2709478897593607e-07, "loss": 0.7169, "step": 13368 }, { "epoch": 0.941147483280535, "grad_norm": 2.028324842453003, "learning_rate": 1.2679219873523094e-07, "loss": 0.649, "step": 13369 }, { "epoch": 0.9412178810278071, "grad_norm": 2.199768543243408, "learning_rate": 1.2648996583882466e-07, "loss": 0.7162, "step": 13370 }, { "epoch": 0.9412882787750791, "grad_norm": 1.8420740365982056, "learning_rate": 1.2618809030242893e-07, "loss": 0.5834, "step": 13371 }, { "epoch": 0.9413586765223513, "grad_norm": 1.8969570398330688, "learning_rate": 1.2588657214174004e-07, "loss": 0.7387, "step": 13372 }, { "epoch": 0.9414290742696234, "grad_norm": 1.9307193756103516, "learning_rate": 1.2558541137243317e-07, "loss": 0.6654, "step": 13373 }, { "epoch": 0.9414994720168954, "grad_norm": 1.999553918838501, "learning_rate": 1.2528460801016806e-07, "loss": 0.574, "step": 13374 }, { "epoch": 0.9415698697641676, "grad_norm": 1.8283189535140991, "learning_rate": 1.2498416207058183e-07, "loss": 0.6844, "step": 13375 }, { "epoch": 0.9416402675114396, "grad_norm": 1.9879765510559082, "learning_rate": 1.2468407356929535e-07, "loss": 0.5697, "step": 13376 }, { "epoch": 0.9417106652587117, "grad_norm": 1.7956700325012207, "learning_rate": 1.243843425219092e-07, "loss": 0.6978, "step": 13377 }, { "epoch": 0.9417810630059839, "grad_norm": 1.8093096017837524, "learning_rate": 1.2408496894400933e-07, "loss": 0.7176, "step": 13378 }, { "epoch": 0.9418514607532559, "grad_norm": 1.9028016328811646, "learning_rate": 1.23785952851159e-07, "loss": 0.692, "step": 13379 }, { "epoch": 0.941921858500528, "grad_norm": 1.720513939857483, "learning_rate": 1.2348729425890447e-07, "loss": 0.6549, "step": 13380 }, { "epoch": 0.9419922562478, "grad_norm": 2.0097224712371826, "learning_rate": 1.2318899318277333e-07, "loss": 0.7064, "step": 13381 }, { "epoch": 0.9420626539950722, "grad_norm": 2.683281898498535, "learning_rate": 1.2289104963827446e-07, "loss": 0.7585, "step": 13382 }, { "epoch": 0.9421330517423443, "grad_norm": 2.4127514362335205, "learning_rate": 1.225934636408974e-07, "loss": 0.6857, "step": 13383 }, { "epoch": 0.9422034494896163, "grad_norm": 2.459670066833496, "learning_rate": 1.222962352061153e-07, "loss": 0.6954, "step": 13384 }, { "epoch": 0.9422738472368885, "grad_norm": 1.7655326128005981, "learning_rate": 1.2199936434938042e-07, "loss": 0.6531, "step": 13385 }, { "epoch": 0.9423442449841605, "grad_norm": 1.9743541479110718, "learning_rate": 1.2170285108612854e-07, "loss": 0.6366, "step": 13386 }, { "epoch": 0.9424146427314326, "grad_norm": 1.840343713760376, "learning_rate": 1.2140669543177386e-07, "loss": 0.5885, "step": 13387 }, { "epoch": 0.9424850404787046, "grad_norm": 2.410935878753662, "learning_rate": 1.2111089740171565e-07, "loss": 0.8034, "step": 13388 }, { "epoch": 0.9425554382259768, "grad_norm": 2.0432605743408203, "learning_rate": 1.208154570113308e-07, "loss": 0.6208, "step": 13389 }, { "epoch": 0.9426258359732489, "grad_norm": 3.531604766845703, "learning_rate": 1.2052037427598128e-07, "loss": 0.5343, "step": 13390 }, { "epoch": 0.9426962337205209, "grad_norm": 2.132019281387329, "learning_rate": 1.2022564921100663e-07, "loss": 0.753, "step": 13391 }, { "epoch": 0.942766631467793, "grad_norm": 1.8770201206207275, "learning_rate": 1.199312818317324e-07, "loss": 0.6844, "step": 13392 }, { "epoch": 0.9428370292150651, "grad_norm": 2.0440452098846436, "learning_rate": 1.1963727215346155e-07, "loss": 0.616, "step": 13393 }, { "epoch": 0.9429074269623372, "grad_norm": 1.9908077716827393, "learning_rate": 1.193436201914807e-07, "loss": 0.7426, "step": 13394 }, { "epoch": 0.9429778247096093, "grad_norm": 2.350351095199585, "learning_rate": 1.1905032596105558e-07, "loss": 0.6889, "step": 13395 }, { "epoch": 0.9430482224568814, "grad_norm": 2.683887243270874, "learning_rate": 1.187573894774363e-07, "loss": 0.6248, "step": 13396 }, { "epoch": 0.9431186202041535, "grad_norm": 2.1175897121429443, "learning_rate": 1.1846481075585125e-07, "loss": 0.6527, "step": 13397 }, { "epoch": 0.9431890179514255, "grad_norm": 1.7565255165100098, "learning_rate": 1.1817258981151401e-07, "loss": 0.5528, "step": 13398 }, { "epoch": 0.9432594156986976, "grad_norm": 3.0567774772644043, "learning_rate": 1.1788072665961646e-07, "loss": 0.5473, "step": 13399 }, { "epoch": 0.9433298134459698, "grad_norm": 1.5908703804016113, "learning_rate": 1.1758922131533334e-07, "loss": 0.6722, "step": 13400 }, { "epoch": 0.9434002111932418, "grad_norm": 2.1995723247528076, "learning_rate": 1.172980737938184e-07, "loss": 0.5862, "step": 13401 }, { "epoch": 0.9434706089405139, "grad_norm": 2.1486356258392334, "learning_rate": 1.1700728411020989e-07, "loss": 0.6992, "step": 13402 }, { "epoch": 0.943541006687786, "grad_norm": 3.505035877227783, "learning_rate": 1.1671685227962658e-07, "loss": 0.6592, "step": 13403 }, { "epoch": 0.9436114044350581, "grad_norm": 2.2411534786224365, "learning_rate": 1.1642677831716863e-07, "loss": 0.6456, "step": 13404 }, { "epoch": 0.9436818021823302, "grad_norm": 1.7767131328582764, "learning_rate": 1.1613706223791675e-07, "loss": 0.7135, "step": 13405 }, { "epoch": 0.9437521999296022, "grad_norm": 1.7237287759780884, "learning_rate": 1.1584770405693223e-07, "loss": 0.6692, "step": 13406 }, { "epoch": 0.9438225976768744, "grad_norm": 2.2404141426086426, "learning_rate": 1.155587037892608e-07, "loss": 0.6509, "step": 13407 }, { "epoch": 0.9438929954241464, "grad_norm": 1.940363883972168, "learning_rate": 1.1527006144992723e-07, "loss": 0.7703, "step": 13408 }, { "epoch": 0.9439633931714185, "grad_norm": 2.470426321029663, "learning_rate": 1.1498177705393841e-07, "loss": 0.5592, "step": 13409 }, { "epoch": 0.9440337909186906, "grad_norm": 2.131884813308716, "learning_rate": 1.1469385061628179e-07, "loss": 0.6829, "step": 13410 }, { "epoch": 0.9441041886659627, "grad_norm": 1.9821776151657104, "learning_rate": 1.1440628215192927e-07, "loss": 0.6773, "step": 13411 }, { "epoch": 0.9441745864132348, "grad_norm": 2.196107864379883, "learning_rate": 1.1411907167582869e-07, "loss": 0.6317, "step": 13412 }, { "epoch": 0.9442449841605068, "grad_norm": 1.8204424381256104, "learning_rate": 1.1383221920291464e-07, "loss": 0.6256, "step": 13413 }, { "epoch": 0.944315381907779, "grad_norm": 2.257648229598999, "learning_rate": 1.135457247480992e-07, "loss": 0.6152, "step": 13414 }, { "epoch": 0.944385779655051, "grad_norm": 1.9619024991989136, "learning_rate": 1.1325958832627892e-07, "loss": 0.4953, "step": 13415 }, { "epoch": 0.9444561774023231, "grad_norm": 2.039294481277466, "learning_rate": 1.1297380995232852e-07, "loss": 0.6296, "step": 13416 }, { "epoch": 0.9445265751495953, "grad_norm": 2.3243672847747803, "learning_rate": 1.1268838964110883e-07, "loss": 0.76, "step": 13417 }, { "epoch": 0.9445969728968673, "grad_norm": 1.8297600746154785, "learning_rate": 1.1240332740745574e-07, "loss": 0.5294, "step": 13418 }, { "epoch": 0.9446673706441394, "grad_norm": 2.0596461296081543, "learning_rate": 1.1211862326619193e-07, "loss": 0.7043, "step": 13419 }, { "epoch": 0.9447377683914114, "grad_norm": 1.8427293300628662, "learning_rate": 1.1183427723211914e-07, "loss": 0.6452, "step": 13420 }, { "epoch": 0.9448081661386836, "grad_norm": 2.1977436542510986, "learning_rate": 1.1155028932002042e-07, "loss": 0.6651, "step": 13421 }, { "epoch": 0.9448785638859557, "grad_norm": 1.924061894416809, "learning_rate": 1.1126665954466019e-07, "loss": 0.6985, "step": 13422 }, { "epoch": 0.9449489616332277, "grad_norm": 2.1756746768951416, "learning_rate": 1.1098338792078655e-07, "loss": 0.6686, "step": 13423 }, { "epoch": 0.9450193593804999, "grad_norm": 1.829715609550476, "learning_rate": 1.1070047446312425e-07, "loss": 0.5827, "step": 13424 }, { "epoch": 0.9450897571277719, "grad_norm": 1.9215165376663208, "learning_rate": 1.1041791918638411e-07, "loss": 0.5644, "step": 13425 }, { "epoch": 0.945160154875044, "grad_norm": 2.140357732772827, "learning_rate": 1.1013572210525513e-07, "loss": 0.6462, "step": 13426 }, { "epoch": 0.945230552622316, "grad_norm": 1.9782085418701172, "learning_rate": 1.0985388323441081e-07, "loss": 0.5965, "step": 13427 }, { "epoch": 0.9453009503695882, "grad_norm": 2.385695457458496, "learning_rate": 1.0957240258850287e-07, "loss": 0.6152, "step": 13428 }, { "epoch": 0.9453713481168603, "grad_norm": 1.8895820379257202, "learning_rate": 1.0929128018216671e-07, "loss": 0.6061, "step": 13429 }, { "epoch": 0.9454417458641323, "grad_norm": 1.9299699068069458, "learning_rate": 1.0901051603001677e-07, "loss": 0.7612, "step": 13430 }, { "epoch": 0.9455121436114045, "grad_norm": 1.8086830377578735, "learning_rate": 1.0873011014665113e-07, "loss": 0.6596, "step": 13431 }, { "epoch": 0.9455825413586765, "grad_norm": 1.9652626514434814, "learning_rate": 1.0845006254664769e-07, "loss": 0.6228, "step": 13432 }, { "epoch": 0.9456529391059486, "grad_norm": 2.0700204372406006, "learning_rate": 1.0817037324456724e-07, "loss": 0.7144, "step": 13433 }, { "epoch": 0.9457233368532207, "grad_norm": 2.1408965587615967, "learning_rate": 1.0789104225495038e-07, "loss": 0.6702, "step": 13434 }, { "epoch": 0.9457937346004928, "grad_norm": 2.03265643119812, "learning_rate": 1.0761206959232061e-07, "loss": 0.5931, "step": 13435 }, { "epoch": 0.9458641323477649, "grad_norm": 2.9437503814697266, "learning_rate": 1.073334552711812e-07, "loss": 0.5668, "step": 13436 }, { "epoch": 0.9459345300950369, "grad_norm": 1.9239253997802734, "learning_rate": 1.0705519930601836e-07, "loss": 0.6318, "step": 13437 }, { "epoch": 0.946004927842309, "grad_norm": 2.275963068008423, "learning_rate": 1.0677730171129729e-07, "loss": 0.7199, "step": 13438 }, { "epoch": 0.9460753255895812, "grad_norm": 2.400522232055664, "learning_rate": 1.0649976250146842e-07, "loss": 0.6643, "step": 13439 }, { "epoch": 0.9461457233368532, "grad_norm": 1.7531081438064575, "learning_rate": 1.0622258169095888e-07, "loss": 0.6229, "step": 13440 }, { "epoch": 0.9462161210841253, "grad_norm": 1.947320580482483, "learning_rate": 1.0594575929418104e-07, "loss": 0.6282, "step": 13441 }, { "epoch": 0.9462865188313974, "grad_norm": 2.0972158908843994, "learning_rate": 1.0566929532552705e-07, "loss": 0.5943, "step": 13442 }, { "epoch": 0.9463569165786695, "grad_norm": 2.1381309032440186, "learning_rate": 1.053931897993704e-07, "loss": 0.7212, "step": 13443 }, { "epoch": 0.9464273143259415, "grad_norm": 1.8956172466278076, "learning_rate": 1.0511744273006518e-07, "loss": 0.611, "step": 13444 }, { "epoch": 0.9464977120732136, "grad_norm": 1.8293368816375732, "learning_rate": 1.0484205413194913e-07, "loss": 0.7091, "step": 13445 }, { "epoch": 0.9465681098204858, "grad_norm": 1.7131117582321167, "learning_rate": 1.0456702401933904e-07, "loss": 0.7191, "step": 13446 }, { "epoch": 0.9466385075677578, "grad_norm": 1.8247473239898682, "learning_rate": 1.0429235240653456e-07, "loss": 0.6599, "step": 13447 }, { "epoch": 0.9467089053150299, "grad_norm": 2.294304609298706, "learning_rate": 1.0401803930781595e-07, "loss": 0.7134, "step": 13448 }, { "epoch": 0.946779303062302, "grad_norm": 1.7649935483932495, "learning_rate": 1.0374408473744478e-07, "loss": 0.5601, "step": 13449 }, { "epoch": 0.9468497008095741, "grad_norm": 1.7575172185897827, "learning_rate": 1.0347048870966324e-07, "loss": 0.5715, "step": 13450 }, { "epoch": 0.9469200985568462, "grad_norm": 1.9083542823791504, "learning_rate": 1.0319725123869716e-07, "loss": 0.7381, "step": 13451 }, { "epoch": 0.9469904963041182, "grad_norm": 2.151188850402832, "learning_rate": 1.0292437233875295e-07, "loss": 0.7239, "step": 13452 }, { "epoch": 0.9470608940513904, "grad_norm": 1.6774327754974365, "learning_rate": 1.0265185202401683e-07, "loss": 0.6017, "step": 13453 }, { "epoch": 0.9471312917986624, "grad_norm": 2.2918827533721924, "learning_rate": 1.0237969030865712e-07, "loss": 0.6906, "step": 13454 }, { "epoch": 0.9472016895459345, "grad_norm": 2.1149768829345703, "learning_rate": 1.0210788720682429e-07, "loss": 0.6504, "step": 13455 }, { "epoch": 0.9472720872932067, "grad_norm": 2.3543179035186768, "learning_rate": 1.0183644273264935e-07, "loss": 0.6879, "step": 13456 }, { "epoch": 0.9473424850404787, "grad_norm": 3.0645675659179688, "learning_rate": 1.0156535690024471e-07, "loss": 0.8039, "step": 13457 }, { "epoch": 0.9474128827877508, "grad_norm": 1.8291361331939697, "learning_rate": 1.0129462972370562e-07, "loss": 0.6105, "step": 13458 }, { "epoch": 0.9474832805350228, "grad_norm": 2.110509157180786, "learning_rate": 1.0102426121710639e-07, "loss": 0.603, "step": 13459 }, { "epoch": 0.947553678282295, "grad_norm": 2.33627986907959, "learning_rate": 1.0075425139450422e-07, "loss": 0.6006, "step": 13460 }, { "epoch": 0.9476240760295671, "grad_norm": 2.027860403060913, "learning_rate": 1.0048460026993611e-07, "loss": 0.665, "step": 13461 }, { "epoch": 0.9476944737768391, "grad_norm": 2.1664278507232666, "learning_rate": 1.0021530785742272e-07, "loss": 0.7787, "step": 13462 }, { "epoch": 0.9477648715241113, "grad_norm": 2.110544204711914, "learning_rate": 9.994637417096375e-08, "loss": 0.6048, "step": 13463 }, { "epoch": 0.9478352692713833, "grad_norm": 1.8432906866073608, "learning_rate": 9.967779922454179e-08, "loss": 0.5431, "step": 13464 }, { "epoch": 0.9479056670186554, "grad_norm": 1.8463634252548218, "learning_rate": 9.940958303212155e-08, "loss": 0.7114, "step": 13465 }, { "epoch": 0.9479760647659274, "grad_norm": 2.187211751937866, "learning_rate": 9.9141725607646e-08, "loss": 0.6276, "step": 13466 }, { "epoch": 0.9480464625131996, "grad_norm": 1.8492976427078247, "learning_rate": 9.8874226965041e-08, "loss": 0.6902, "step": 13467 }, { "epoch": 0.9481168602604717, "grad_norm": 2.1779837608337402, "learning_rate": 9.860708711821609e-08, "loss": 0.7217, "step": 13468 }, { "epoch": 0.9481872580077437, "grad_norm": 1.863638997077942, "learning_rate": 9.834030608105826e-08, "loss": 0.5507, "step": 13469 }, { "epoch": 0.9482576557550159, "grad_norm": 1.8404645919799805, "learning_rate": 9.807388386743898e-08, "loss": 0.6783, "step": 13470 }, { "epoch": 0.9483280535022879, "grad_norm": 2.3536882400512695, "learning_rate": 9.780782049120873e-08, "loss": 0.6077, "step": 13471 }, { "epoch": 0.94839845124956, "grad_norm": 1.8249744176864624, "learning_rate": 9.754211596620166e-08, "loss": 0.6344, "step": 13472 }, { "epoch": 0.9484688489968321, "grad_norm": 2.0205838680267334, "learning_rate": 9.727677030623017e-08, "loss": 0.5455, "step": 13473 }, { "epoch": 0.9485392467441042, "grad_norm": 2.186877965927124, "learning_rate": 9.701178352509187e-08, "loss": 0.561, "step": 13474 }, { "epoch": 0.9486096444913763, "grad_norm": 2.459009885787964, "learning_rate": 9.674715563656189e-08, "loss": 0.6331, "step": 13475 }, { "epoch": 0.9486800422386483, "grad_norm": 2.4594810009002686, "learning_rate": 9.648288665439975e-08, "loss": 0.7357, "step": 13476 }, { "epoch": 0.9487504399859205, "grad_norm": 1.593027114868164, "learning_rate": 9.621897659234401e-08, "loss": 0.7009, "step": 13477 }, { "epoch": 0.9488208377331926, "grad_norm": 2.043124198913574, "learning_rate": 9.595542546411695e-08, "loss": 0.7113, "step": 13478 }, { "epoch": 0.9488912354804646, "grad_norm": 1.7053319215774536, "learning_rate": 9.569223328341903e-08, "loss": 0.6996, "step": 13479 }, { "epoch": 0.9489616332277367, "grad_norm": 2.0380146503448486, "learning_rate": 9.542940006393518e-08, "loss": 0.6315, "step": 13480 }, { "epoch": 0.9490320309750088, "grad_norm": 1.8521177768707275, "learning_rate": 9.516692581933017e-08, "loss": 0.6577, "step": 13481 }, { "epoch": 0.9491024287222809, "grad_norm": 2.0218870639801025, "learning_rate": 9.490481056325006e-08, "loss": 0.6741, "step": 13482 }, { "epoch": 0.9491728264695529, "grad_norm": 1.9566231966018677, "learning_rate": 9.464305430932229e-08, "loss": 0.657, "step": 13483 }, { "epoch": 0.949243224216825, "grad_norm": 2.349400281906128, "learning_rate": 9.438165707115642e-08, "loss": 0.6015, "step": 13484 }, { "epoch": 0.9493136219640972, "grad_norm": 3.958007574081421, "learning_rate": 9.412061886234102e-08, "loss": 0.6934, "step": 13485 }, { "epoch": 0.9493840197113692, "grad_norm": 2.712344169616699, "learning_rate": 9.385993969645067e-08, "loss": 0.6356, "step": 13486 }, { "epoch": 0.9494544174586413, "grad_norm": 2.4533417224884033, "learning_rate": 9.359961958703511e-08, "loss": 0.6557, "step": 13487 }, { "epoch": 0.9495248152059134, "grad_norm": 2.0656628608703613, "learning_rate": 9.33396585476316e-08, "loss": 0.6405, "step": 13488 }, { "epoch": 0.9495952129531855, "grad_norm": 1.852553367614746, "learning_rate": 9.308005659175334e-08, "loss": 0.5772, "step": 13489 }, { "epoch": 0.9496656107004576, "grad_norm": 1.814408302307129, "learning_rate": 9.282081373289874e-08, "loss": 0.6601, "step": 13490 }, { "epoch": 0.9497360084477297, "grad_norm": 2.009521007537842, "learning_rate": 9.256192998454448e-08, "loss": 0.6537, "step": 13491 }, { "epoch": 0.9498064061950018, "grad_norm": 1.9993176460266113, "learning_rate": 9.230340536015247e-08, "loss": 0.5729, "step": 13492 }, { "epoch": 0.9498768039422738, "grad_norm": 2.1286978721618652, "learning_rate": 9.204523987316126e-08, "loss": 0.6327, "step": 13493 }, { "epoch": 0.9499472016895459, "grad_norm": 2.102461338043213, "learning_rate": 9.178743353699547e-08, "loss": 0.6518, "step": 13494 }, { "epoch": 0.9500175994368181, "grad_norm": 3.2038493156433105, "learning_rate": 9.152998636505716e-08, "loss": 0.7764, "step": 13495 }, { "epoch": 0.9500879971840901, "grad_norm": 2.2323856353759766, "learning_rate": 9.127289837073127e-08, "loss": 0.5964, "step": 13496 }, { "epoch": 0.9501583949313622, "grad_norm": 2.464371919631958, "learning_rate": 9.101616956738412e-08, "loss": 0.6192, "step": 13497 }, { "epoch": 0.9502287926786342, "grad_norm": 2.102384567260742, "learning_rate": 9.075979996836336e-08, "loss": 0.7398, "step": 13498 }, { "epoch": 0.9502991904259064, "grad_norm": 2.1495323181152344, "learning_rate": 9.0503789586998e-08, "loss": 0.6304, "step": 13499 }, { "epoch": 0.9503695881731784, "grad_norm": 2.1055006980895996, "learning_rate": 9.02481384365984e-08, "loss": 0.6086, "step": 13500 }, { "epoch": 0.9504399859204505, "grad_norm": 1.6962460279464722, "learning_rate": 8.999284653045625e-08, "loss": 0.6911, "step": 13501 }, { "epoch": 0.9505103836677227, "grad_norm": 1.733275294303894, "learning_rate": 8.973791388184383e-08, "loss": 0.6118, "step": 13502 }, { "epoch": 0.9505807814149947, "grad_norm": 1.9594041109085083, "learning_rate": 8.948334050401552e-08, "loss": 0.6067, "step": 13503 }, { "epoch": 0.9506511791622668, "grad_norm": 2.673973321914673, "learning_rate": 8.922912641020631e-08, "loss": 0.6799, "step": 13504 }, { "epoch": 0.9507215769095388, "grad_norm": 2.197950601577759, "learning_rate": 8.897527161363484e-08, "loss": 0.6227, "step": 13505 }, { "epoch": 0.950791974656811, "grad_norm": 1.811566948890686, "learning_rate": 8.872177612749721e-08, "loss": 0.611, "step": 13506 }, { "epoch": 0.9508623724040831, "grad_norm": 1.947840690612793, "learning_rate": 8.846863996497401e-08, "loss": 0.5917, "step": 13507 }, { "epoch": 0.9509327701513551, "grad_norm": 2.029402017593384, "learning_rate": 8.82158631392248e-08, "loss": 0.6756, "step": 13508 }, { "epoch": 0.9510031678986273, "grad_norm": 2.3874142169952393, "learning_rate": 8.796344566339364e-08, "loss": 0.6737, "step": 13509 }, { "epoch": 0.9510735656458993, "grad_norm": 1.7948827743530273, "learning_rate": 8.771138755060204e-08, "loss": 0.5972, "step": 13510 }, { "epoch": 0.9511439633931714, "grad_norm": 2.2914843559265137, "learning_rate": 8.745968881395594e-08, "loss": 0.6657, "step": 13511 }, { "epoch": 0.9512143611404436, "grad_norm": 2.180859327316284, "learning_rate": 8.72083494665411e-08, "loss": 0.7661, "step": 13512 }, { "epoch": 0.9512847588877156, "grad_norm": 1.9718856811523438, "learning_rate": 8.695736952142541e-08, "loss": 0.7791, "step": 13513 }, { "epoch": 0.9513551566349877, "grad_norm": 2.4089953899383545, "learning_rate": 8.670674899165576e-08, "loss": 0.6239, "step": 13514 }, { "epoch": 0.9514255543822597, "grad_norm": 2.314481496810913, "learning_rate": 8.64564878902635e-08, "loss": 0.6262, "step": 13515 }, { "epoch": 0.9514959521295319, "grad_norm": 1.8290013074874878, "learning_rate": 8.620658623025978e-08, "loss": 0.6519, "step": 13516 }, { "epoch": 0.951566349876804, "grad_norm": 1.5431467294692993, "learning_rate": 8.595704402463711e-08, "loss": 0.576, "step": 13517 }, { "epoch": 0.951636747624076, "grad_norm": 2.2041656970977783, "learning_rate": 8.570786128636931e-08, "loss": 0.5726, "step": 13518 }, { "epoch": 0.9517071453713482, "grad_norm": 1.8178954124450684, "learning_rate": 8.545903802841237e-08, "loss": 0.6285, "step": 13519 }, { "epoch": 0.9517775431186202, "grad_norm": 2.2567670345306396, "learning_rate": 8.521057426370126e-08, "loss": 0.6002, "step": 13520 }, { "epoch": 0.9518479408658923, "grad_norm": 1.675374984741211, "learning_rate": 8.496247000515622e-08, "loss": 0.5838, "step": 13521 }, { "epoch": 0.9519183386131643, "grad_norm": 1.9119999408721924, "learning_rate": 8.471472526567336e-08, "loss": 0.6513, "step": 13522 }, { "epoch": 0.9519887363604365, "grad_norm": 1.8354443311691284, "learning_rate": 8.44673400581356e-08, "loss": 0.5931, "step": 13523 }, { "epoch": 0.9520591341077086, "grad_norm": 2.5288004875183105, "learning_rate": 8.422031439540412e-08, "loss": 0.718, "step": 13524 }, { "epoch": 0.9521295318549806, "grad_norm": 2.2762513160705566, "learning_rate": 8.397364829032217e-08, "loss": 0.5504, "step": 13525 }, { "epoch": 0.9521999296022527, "grad_norm": 1.7784111499786377, "learning_rate": 8.372734175571284e-08, "loss": 0.5967, "step": 13526 }, { "epoch": 0.9522703273495248, "grad_norm": 2.297367811203003, "learning_rate": 8.348139480438288e-08, "loss": 0.7091, "step": 13527 }, { "epoch": 0.9523407250967969, "grad_norm": 1.9913169145584106, "learning_rate": 8.323580744911884e-08, "loss": 0.6787, "step": 13528 }, { "epoch": 0.952411122844069, "grad_norm": 1.8363467454910278, "learning_rate": 8.29905797026902e-08, "loss": 0.6809, "step": 13529 }, { "epoch": 0.9524815205913411, "grad_norm": 2.024702548980713, "learning_rate": 8.274571157784538e-08, "loss": 0.5699, "step": 13530 }, { "epoch": 0.9525519183386132, "grad_norm": 2.0820822715759277, "learning_rate": 8.250120308731578e-08, "loss": 0.6642, "step": 13531 }, { "epoch": 0.9526223160858852, "grad_norm": 2.251187324523926, "learning_rate": 8.225705424381336e-08, "loss": 0.6524, "step": 13532 }, { "epoch": 0.9526927138331573, "grad_norm": 2.0526387691497803, "learning_rate": 8.201326506003215e-08, "loss": 0.7165, "step": 13533 }, { "epoch": 0.9527631115804295, "grad_norm": 1.8201161623001099, "learning_rate": 8.176983554864602e-08, "loss": 0.7284, "step": 13534 }, { "epoch": 0.9528335093277015, "grad_norm": 1.6643059253692627, "learning_rate": 8.152676572231176e-08, "loss": 0.7196, "step": 13535 }, { "epoch": 0.9529039070749736, "grad_norm": 2.2868950366973877, "learning_rate": 8.12840555936667e-08, "loss": 0.6481, "step": 13536 }, { "epoch": 0.9529743048222457, "grad_norm": 2.399549961090088, "learning_rate": 8.104170517533027e-08, "loss": 0.6557, "step": 13537 }, { "epoch": 0.9530447025695178, "grad_norm": 2.4057297706604004, "learning_rate": 8.079971447990019e-08, "loss": 0.5776, "step": 13538 }, { "epoch": 0.9531151003167898, "grad_norm": 2.19360089302063, "learning_rate": 8.055808351996096e-08, "loss": 0.67, "step": 13539 }, { "epoch": 0.9531854980640619, "grad_norm": 1.9187644720077515, "learning_rate": 8.031681230807219e-08, "loss": 0.6529, "step": 13540 }, { "epoch": 0.9532558958113341, "grad_norm": 4.9516167640686035, "learning_rate": 8.007590085678029e-08, "loss": 0.6386, "step": 13541 }, { "epoch": 0.9533262935586061, "grad_norm": 2.395658493041992, "learning_rate": 7.983534917860912e-08, "loss": 0.6582, "step": 13542 }, { "epoch": 0.9533966913058782, "grad_norm": 1.8376082181930542, "learning_rate": 7.95951572860647e-08, "loss": 0.5181, "step": 13543 }, { "epoch": 0.9534670890531503, "grad_norm": 1.9218366146087646, "learning_rate": 7.935532519163668e-08, "loss": 0.6488, "step": 13544 }, { "epoch": 0.9535374868004224, "grad_norm": 1.8384186029434204, "learning_rate": 7.911585290779222e-08, "loss": 0.5468, "step": 13545 }, { "epoch": 0.9536078845476945, "grad_norm": 2.158933162689209, "learning_rate": 7.88767404469829e-08, "loss": 0.6122, "step": 13546 }, { "epoch": 0.9536782822949665, "grad_norm": 2.077000617980957, "learning_rate": 7.86379878216401e-08, "loss": 0.7468, "step": 13547 }, { "epoch": 0.9537486800422387, "grad_norm": 2.0154943466186523, "learning_rate": 7.839959504417581e-08, "loss": 0.6556, "step": 13548 }, { "epoch": 0.9538190777895107, "grad_norm": 2.7230684757232666, "learning_rate": 7.816156212698567e-08, "loss": 0.7211, "step": 13549 }, { "epoch": 0.9538894755367828, "grad_norm": 1.6363073587417603, "learning_rate": 7.792388908244508e-08, "loss": 0.6103, "step": 13550 }, { "epoch": 0.953959873284055, "grad_norm": 2.0410609245300293, "learning_rate": 7.768657592291006e-08, "loss": 0.6816, "step": 13551 }, { "epoch": 0.954030271031327, "grad_norm": 2.1506080627441406, "learning_rate": 7.744962266071953e-08, "loss": 0.7932, "step": 13552 }, { "epoch": 0.9541006687785991, "grad_norm": 2.7225136756896973, "learning_rate": 7.72130293081914e-08, "loss": 0.6829, "step": 13553 }, { "epoch": 0.9541710665258711, "grad_norm": 2.1076903343200684, "learning_rate": 7.697679587762885e-08, "loss": 0.6964, "step": 13554 }, { "epoch": 0.9542414642731433, "grad_norm": 1.9626339673995972, "learning_rate": 7.674092238131247e-08, "loss": 0.7169, "step": 13555 }, { "epoch": 0.9543118620204153, "grad_norm": 1.7695571184158325, "learning_rate": 7.650540883150503e-08, "loss": 0.665, "step": 13556 }, { "epoch": 0.9543822597676874, "grad_norm": 2.1026620864868164, "learning_rate": 7.627025524045217e-08, "loss": 0.6022, "step": 13557 }, { "epoch": 0.9544526575149596, "grad_norm": 2.006113052368164, "learning_rate": 7.603546162037855e-08, "loss": 0.6476, "step": 13558 }, { "epoch": 0.9545230552622316, "grad_norm": 1.9097014665603638, "learning_rate": 7.580102798349253e-08, "loss": 0.5617, "step": 13559 }, { "epoch": 0.9545934530095037, "grad_norm": 1.8966267108917236, "learning_rate": 7.556695434198146e-08, "loss": 0.5206, "step": 13560 }, { "epoch": 0.9546638507567757, "grad_norm": 1.8744064569473267, "learning_rate": 7.533324070801639e-08, "loss": 0.6914, "step": 13561 }, { "epoch": 0.9547342485040479, "grad_norm": 2.0989973545074463, "learning_rate": 7.509988709374737e-08, "loss": 0.7281, "step": 13562 }, { "epoch": 0.95480464625132, "grad_norm": 1.9051287174224854, "learning_rate": 7.486689351130581e-08, "loss": 0.7331, "step": 13563 }, { "epoch": 0.954875043998592, "grad_norm": 2.101846694946289, "learning_rate": 7.463425997280759e-08, "loss": 0.7578, "step": 13564 }, { "epoch": 0.9549454417458642, "grad_norm": 1.9735002517700195, "learning_rate": 7.440198649034524e-08, "loss": 0.7437, "step": 13565 }, { "epoch": 0.9550158394931362, "grad_norm": 1.9298404455184937, "learning_rate": 7.417007307599576e-08, "loss": 0.5696, "step": 13566 }, { "epoch": 0.9550862372404083, "grad_norm": 1.8533270359039307, "learning_rate": 7.393851974181753e-08, "loss": 0.7156, "step": 13567 }, { "epoch": 0.9551566349876804, "grad_norm": 2.2969706058502197, "learning_rate": 7.370732649984713e-08, "loss": 0.6322, "step": 13568 }, { "epoch": 0.9552270327349525, "grad_norm": 1.7792129516601562, "learning_rate": 7.347649336210638e-08, "loss": 0.6077, "step": 13569 }, { "epoch": 0.9552974304822246, "grad_norm": 1.7985377311706543, "learning_rate": 7.324602034059535e-08, "loss": 0.6283, "step": 13570 }, { "epoch": 0.9553678282294966, "grad_norm": 3.7738869190216064, "learning_rate": 7.301590744729702e-08, "loss": 0.6665, "step": 13571 }, { "epoch": 0.9554382259767688, "grad_norm": 1.789225459098816, "learning_rate": 7.278615469417494e-08, "loss": 0.7112, "step": 13572 }, { "epoch": 0.9555086237240409, "grad_norm": 1.7603168487548828, "learning_rate": 7.255676209317474e-08, "loss": 0.6419, "step": 13573 }, { "epoch": 0.9555790214713129, "grad_norm": 1.94046151638031, "learning_rate": 7.232772965622269e-08, "loss": 0.6086, "step": 13574 }, { "epoch": 0.955649419218585, "grad_norm": 5.449466228485107, "learning_rate": 7.20990573952256e-08, "loss": 0.7261, "step": 13575 }, { "epoch": 0.9557198169658571, "grad_norm": 2.30816912651062, "learning_rate": 7.187074532207238e-08, "loss": 0.5006, "step": 13576 }, { "epoch": 0.9557902147131292, "grad_norm": 2.058734178543091, "learning_rate": 7.164279344863412e-08, "loss": 0.7273, "step": 13577 }, { "epoch": 0.9558606124604012, "grad_norm": 1.8945261240005493, "learning_rate": 7.141520178676164e-08, "loss": 0.6206, "step": 13578 }, { "epoch": 0.9559310102076733, "grad_norm": 2.8676421642303467, "learning_rate": 7.118797034828794e-08, "loss": 0.7222, "step": 13579 }, { "epoch": 0.9560014079549455, "grad_norm": 2.3443057537078857, "learning_rate": 7.096109914502657e-08, "loss": 0.5452, "step": 13580 }, { "epoch": 0.9560718057022175, "grad_norm": 1.8776721954345703, "learning_rate": 7.073458818877243e-08, "loss": 0.6766, "step": 13581 }, { "epoch": 0.9561422034494896, "grad_norm": 1.7718335390090942, "learning_rate": 7.050843749130331e-08, "loss": 0.6911, "step": 13582 }, { "epoch": 0.9562126011967617, "grad_norm": 2.2475342750549316, "learning_rate": 7.028264706437526e-08, "loss": 0.621, "step": 13583 }, { "epoch": 0.9562829989440338, "grad_norm": 1.8259342908859253, "learning_rate": 7.005721691972954e-08, "loss": 0.6216, "step": 13584 }, { "epoch": 0.9563533966913059, "grad_norm": 1.8573869466781616, "learning_rate": 6.983214706908491e-08, "loss": 0.4355, "step": 13585 }, { "epoch": 0.956423794438578, "grad_norm": 2.1163718700408936, "learning_rate": 6.9607437524143e-08, "loss": 0.5748, "step": 13586 }, { "epoch": 0.9564941921858501, "grad_norm": 2.0051276683807373, "learning_rate": 6.93830882965868e-08, "loss": 0.5903, "step": 13587 }, { "epoch": 0.9565645899331221, "grad_norm": 1.763647437095642, "learning_rate": 6.915909939808062e-08, "loss": 0.6669, "step": 13588 }, { "epoch": 0.9566349876803942, "grad_norm": 2.1762747764587402, "learning_rate": 6.89354708402694e-08, "loss": 0.5582, "step": 13589 }, { "epoch": 0.9567053854276664, "grad_norm": 2.0491652488708496, "learning_rate": 6.871220263478095e-08, "loss": 0.6248, "step": 13590 }, { "epoch": 0.9567757831749384, "grad_norm": 1.8627161979675293, "learning_rate": 6.848929479322286e-08, "loss": 0.5746, "step": 13591 }, { "epoch": 0.9568461809222105, "grad_norm": 1.8740869760513306, "learning_rate": 6.826674732718329e-08, "loss": 0.6643, "step": 13592 }, { "epoch": 0.9569165786694825, "grad_norm": 1.8977786302566528, "learning_rate": 6.804456024823258e-08, "loss": 0.6286, "step": 13593 }, { "epoch": 0.9569869764167547, "grad_norm": 2.4675509929656982, "learning_rate": 6.78227335679239e-08, "loss": 0.6601, "step": 13594 }, { "epoch": 0.9570573741640267, "grad_norm": 1.7861179113388062, "learning_rate": 6.760126729778948e-08, "loss": 0.6436, "step": 13595 }, { "epoch": 0.9571277719112988, "grad_norm": 2.0821263790130615, "learning_rate": 6.738016144934366e-08, "loss": 0.6429, "step": 13596 }, { "epoch": 0.957198169658571, "grad_norm": 2.348494529724121, "learning_rate": 6.715941603408138e-08, "loss": 0.5882, "step": 13597 }, { "epoch": 0.957268567405843, "grad_norm": 2.142024040222168, "learning_rate": 6.693903106348043e-08, "loss": 0.6135, "step": 13598 }, { "epoch": 0.9573389651531151, "grad_norm": 2.120490789413452, "learning_rate": 6.671900654899687e-08, "loss": 0.7266, "step": 13599 }, { "epoch": 0.9574093629003871, "grad_norm": 2.4888932704925537, "learning_rate": 6.649934250207279e-08, "loss": 0.6363, "step": 13600 }, { "epoch": 0.9574797606476593, "grad_norm": 2.310333728790283, "learning_rate": 6.628003893412615e-08, "loss": 0.6688, "step": 13601 }, { "epoch": 0.9575501583949314, "grad_norm": 1.9564297199249268, "learning_rate": 6.606109585656017e-08, "loss": 0.4714, "step": 13602 }, { "epoch": 0.9576205561422034, "grad_norm": 1.7093617916107178, "learning_rate": 6.584251328075785e-08, "loss": 0.6653, "step": 13603 }, { "epoch": 0.9576909538894756, "grad_norm": 2.188835382461548, "learning_rate": 6.562429121808277e-08, "loss": 0.63, "step": 13604 }, { "epoch": 0.9577613516367476, "grad_norm": 1.8316766023635864, "learning_rate": 6.540642967988142e-08, "loss": 0.598, "step": 13605 }, { "epoch": 0.9578317493840197, "grad_norm": 2.0702500343322754, "learning_rate": 6.518892867748005e-08, "loss": 0.6616, "step": 13606 }, { "epoch": 0.9579021471312918, "grad_norm": 2.1756365299224854, "learning_rate": 6.497178822218707e-08, "loss": 0.6875, "step": 13607 }, { "epoch": 0.9579725448785639, "grad_norm": 2.3575305938720703, "learning_rate": 6.475500832529068e-08, "loss": 0.6875, "step": 13608 }, { "epoch": 0.958042942625836, "grad_norm": 1.8828456401824951, "learning_rate": 6.453858899806352e-08, "loss": 0.6451, "step": 13609 }, { "epoch": 0.958113340373108, "grad_norm": 2.190805435180664, "learning_rate": 6.432253025175494e-08, "loss": 0.6011, "step": 13610 }, { "epoch": 0.9581837381203802, "grad_norm": 1.8467955589294434, "learning_rate": 6.410683209760026e-08, "loss": 0.6955, "step": 13611 }, { "epoch": 0.9582541358676523, "grad_norm": 1.8554730415344238, "learning_rate": 6.389149454681231e-08, "loss": 0.6153, "step": 13612 }, { "epoch": 0.9583245336149243, "grad_norm": 1.9214736223220825, "learning_rate": 6.367651761058757e-08, "loss": 0.626, "step": 13613 }, { "epoch": 0.9583949313621964, "grad_norm": 2.225468873977661, "learning_rate": 6.346190130010232e-08, "loss": 0.696, "step": 13614 }, { "epoch": 0.9584653291094685, "grad_norm": 2.6386213302612305, "learning_rate": 6.324764562651575e-08, "loss": 0.6364, "step": 13615 }, { "epoch": 0.9585357268567406, "grad_norm": 1.8707605600357056, "learning_rate": 6.303375060096527e-08, "loss": 0.6791, "step": 13616 }, { "epoch": 0.9586061246040126, "grad_norm": 3.8989546298980713, "learning_rate": 6.282021623457356e-08, "loss": 0.6301, "step": 13617 }, { "epoch": 0.9586765223512848, "grad_norm": 1.9435675144195557, "learning_rate": 6.260704253844073e-08, "loss": 0.6889, "step": 13618 }, { "epoch": 0.9587469200985569, "grad_norm": 1.8625903129577637, "learning_rate": 6.239422952365058e-08, "loss": 0.7497, "step": 13619 }, { "epoch": 0.9588173178458289, "grad_norm": 2.246880292892456, "learning_rate": 6.218177720126827e-08, "loss": 0.683, "step": 13620 }, { "epoch": 0.958887715593101, "grad_norm": 2.0311625003814697, "learning_rate": 6.196968558233873e-08, "loss": 0.6082, "step": 13621 }, { "epoch": 0.9589581133403731, "grad_norm": 1.8701856136322021, "learning_rate": 6.175795467788748e-08, "loss": 0.6778, "step": 13622 }, { "epoch": 0.9590285110876452, "grad_norm": 1.9871346950531006, "learning_rate": 6.154658449892447e-08, "loss": 0.707, "step": 13623 }, { "epoch": 0.9590989088349173, "grad_norm": 1.9327200651168823, "learning_rate": 6.133557505643871e-08, "loss": 0.6129, "step": 13624 }, { "epoch": 0.9591693065821894, "grad_norm": 1.8003798723220825, "learning_rate": 6.112492636139977e-08, "loss": 0.5667, "step": 13625 }, { "epoch": 0.9592397043294615, "grad_norm": 2.252497911453247, "learning_rate": 6.091463842476009e-08, "loss": 0.6133, "step": 13626 }, { "epoch": 0.9593101020767335, "grad_norm": 1.715781569480896, "learning_rate": 6.070471125745425e-08, "loss": 0.6085, "step": 13627 }, { "epoch": 0.9593804998240056, "grad_norm": 1.992509126663208, "learning_rate": 6.049514487039354e-08, "loss": 0.6454, "step": 13628 }, { "epoch": 0.9594508975712778, "grad_norm": 2.1212854385375977, "learning_rate": 6.028593927447523e-08, "loss": 0.5407, "step": 13629 }, { "epoch": 0.9595212953185498, "grad_norm": 2.5157265663146973, "learning_rate": 6.007709448057563e-08, "loss": 0.7494, "step": 13630 }, { "epoch": 0.9595916930658219, "grad_norm": 1.9883391857147217, "learning_rate": 5.986861049955394e-08, "loss": 0.557, "step": 13631 }, { "epoch": 0.959662090813094, "grad_norm": 2.130262613296509, "learning_rate": 5.966048734224837e-08, "loss": 0.73, "step": 13632 }, { "epoch": 0.9597324885603661, "grad_norm": 2.1431305408477783, "learning_rate": 5.945272501947929e-08, "loss": 0.5935, "step": 13633 }, { "epoch": 0.9598028863076381, "grad_norm": 1.9333453178405762, "learning_rate": 5.9245323542049145e-08, "loss": 0.5559, "step": 13634 }, { "epoch": 0.9598732840549102, "grad_norm": 2.570906639099121, "learning_rate": 5.9038282920740205e-08, "loss": 0.6179, "step": 13635 }, { "epoch": 0.9599436818021824, "grad_norm": 2.0127527713775635, "learning_rate": 5.883160316631686e-08, "loss": 0.6484, "step": 13636 }, { "epoch": 0.9600140795494544, "grad_norm": 2.1900198459625244, "learning_rate": 5.862528428952485e-08, "loss": 0.6746, "step": 13637 }, { "epoch": 0.9600844772967265, "grad_norm": 2.0005452632904053, "learning_rate": 5.8419326301091255e-08, "loss": 0.6448, "step": 13638 }, { "epoch": 0.9601548750439985, "grad_norm": 2.011763095855713, "learning_rate": 5.821372921172374e-08, "loss": 0.6064, "step": 13639 }, { "epoch": 0.9602252727912707, "grad_norm": 1.964816689491272, "learning_rate": 5.800849303211053e-08, "loss": 0.6651, "step": 13640 }, { "epoch": 0.9602956705385428, "grad_norm": 2.1794002056121826, "learning_rate": 5.780361777292275e-08, "loss": 0.7228, "step": 13641 }, { "epoch": 0.9603660682858148, "grad_norm": 2.037421226501465, "learning_rate": 5.7599103444812114e-08, "loss": 0.5911, "step": 13642 }, { "epoch": 0.960436466033087, "grad_norm": 2.1224215030670166, "learning_rate": 5.739495005841244e-08, "loss": 0.6842, "step": 13643 }, { "epoch": 0.960506863780359, "grad_norm": 1.9006543159484863, "learning_rate": 5.7191157624335796e-08, "loss": 0.6786, "step": 13644 }, { "epoch": 0.9605772615276311, "grad_norm": 1.9615905284881592, "learning_rate": 5.698772615317948e-08, "loss": 0.5202, "step": 13645 }, { "epoch": 0.9606476592749033, "grad_norm": 1.8670190572738647, "learning_rate": 5.6784655655519044e-08, "loss": 0.6248, "step": 13646 }, { "epoch": 0.9607180570221753, "grad_norm": 1.9940986633300781, "learning_rate": 5.6581946141912144e-08, "loss": 0.6862, "step": 13647 }, { "epoch": 0.9607884547694474, "grad_norm": 1.7541002035140991, "learning_rate": 5.6379597622898566e-08, "loss": 0.5908, "step": 13648 }, { "epoch": 0.9608588525167194, "grad_norm": 2.1047027111053467, "learning_rate": 5.617761010899791e-08, "loss": 0.6175, "step": 13649 }, { "epoch": 0.9609292502639916, "grad_norm": 2.541874647140503, "learning_rate": 5.5975983610711876e-08, "loss": 0.7125, "step": 13650 }, { "epoch": 0.9609996480112636, "grad_norm": 3.5797765254974365, "learning_rate": 5.577471813852353e-08, "loss": 0.7839, "step": 13651 }, { "epoch": 0.9610700457585357, "grad_norm": 2.0358362197875977, "learning_rate": 5.557381370289727e-08, "loss": 0.56, "step": 13652 }, { "epoch": 0.9611404435058079, "grad_norm": 1.8879426717758179, "learning_rate": 5.537327031427652e-08, "loss": 0.6267, "step": 13653 }, { "epoch": 0.9612108412530799, "grad_norm": 2.4014925956726074, "learning_rate": 5.517308798308995e-08, "loss": 0.7053, "step": 13654 }, { "epoch": 0.961281239000352, "grad_norm": 2.054710865020752, "learning_rate": 5.497326671974367e-08, "loss": 0.6721, "step": 13655 }, { "epoch": 0.961351636747624, "grad_norm": 1.744775414466858, "learning_rate": 5.477380653462671e-08, "loss": 0.7076, "step": 13656 }, { "epoch": 0.9614220344948962, "grad_norm": 2.3048479557037354, "learning_rate": 5.4574707438110216e-08, "loss": 0.7094, "step": 13657 }, { "epoch": 0.9614924322421683, "grad_norm": 1.9648104906082153, "learning_rate": 5.437596944054435e-08, "loss": 0.5633, "step": 13658 }, { "epoch": 0.9615628299894403, "grad_norm": 1.8399486541748047, "learning_rate": 5.417759255226218e-08, "loss": 0.6452, "step": 13659 }, { "epoch": 0.9616332277367124, "grad_norm": 1.984792709350586, "learning_rate": 5.397957678357812e-08, "loss": 0.6495, "step": 13660 }, { "epoch": 0.9617036254839845, "grad_norm": 1.9999053478240967, "learning_rate": 5.378192214478561e-08, "loss": 0.6539, "step": 13661 }, { "epoch": 0.9617740232312566, "grad_norm": 1.8746427297592163, "learning_rate": 5.358462864616253e-08, "loss": 0.7237, "step": 13662 }, { "epoch": 0.9618444209785287, "grad_norm": 2.001176357269287, "learning_rate": 5.3387696297965025e-08, "loss": 0.6083, "step": 13663 }, { "epoch": 0.9619148187258008, "grad_norm": 3.503817081451416, "learning_rate": 5.319112511043289e-08, "loss": 0.7136, "step": 13664 }, { "epoch": 0.9619852164730729, "grad_norm": 1.9593348503112793, "learning_rate": 5.299491509378495e-08, "loss": 0.6674, "step": 13665 }, { "epoch": 0.9620556142203449, "grad_norm": 1.7852369546890259, "learning_rate": 5.279906625822373e-08, "loss": 0.6685, "step": 13666 }, { "epoch": 0.962126011967617, "grad_norm": 1.710307002067566, "learning_rate": 5.260357861393072e-08, "loss": 0.6338, "step": 13667 }, { "epoch": 0.9621964097148892, "grad_norm": 2.1761062145233154, "learning_rate": 5.2408452171069593e-08, "loss": 0.533, "step": 13668 }, { "epoch": 0.9622668074621612, "grad_norm": 1.9399069547653198, "learning_rate": 5.221368693978456e-08, "loss": 0.6902, "step": 13669 }, { "epoch": 0.9623372052094333, "grad_norm": 2.0464670658111572, "learning_rate": 5.201928293020275e-08, "loss": 0.6279, "step": 13670 }, { "epoch": 0.9624076029567054, "grad_norm": 1.9260276556015015, "learning_rate": 5.182524015243028e-08, "loss": 0.6916, "step": 13671 }, { "epoch": 0.9624780007039775, "grad_norm": 2.711745500564575, "learning_rate": 5.1631558616556996e-08, "loss": 0.5955, "step": 13672 }, { "epoch": 0.9625483984512495, "grad_norm": 1.569609522819519, "learning_rate": 5.1438238332651706e-08, "loss": 0.4917, "step": 13673 }, { "epoch": 0.9626187961985216, "grad_norm": 1.781225562095642, "learning_rate": 5.1245279310764615e-08, "loss": 0.6134, "step": 13674 }, { "epoch": 0.9626891939457938, "grad_norm": 2.5091304779052734, "learning_rate": 5.105268156092957e-08, "loss": 0.5715, "step": 13675 }, { "epoch": 0.9627595916930658, "grad_norm": 1.6548396348953247, "learning_rate": 5.086044509315868e-08, "loss": 0.6333, "step": 13676 }, { "epoch": 0.9628299894403379, "grad_norm": 2.183459997177124, "learning_rate": 5.066856991744617e-08, "loss": 0.581, "step": 13677 }, { "epoch": 0.96290038718761, "grad_norm": 2.3391876220703125, "learning_rate": 5.047705604376918e-08, "loss": 0.7035, "step": 13678 }, { "epoch": 0.9629707849348821, "grad_norm": 1.8509852886199951, "learning_rate": 5.028590348208306e-08, "loss": 0.7064, "step": 13679 }, { "epoch": 0.9630411826821542, "grad_norm": 2.7745394706726074, "learning_rate": 5.009511224232765e-08, "loss": 0.6997, "step": 13680 }, { "epoch": 0.9631115804294262, "grad_norm": 2.326376438140869, "learning_rate": 4.99046823344218e-08, "loss": 0.534, "step": 13681 }, { "epoch": 0.9631819781766984, "grad_norm": 1.7370011806488037, "learning_rate": 4.9714613768264916e-08, "loss": 0.5492, "step": 13682 }, { "epoch": 0.9632523759239704, "grad_norm": 1.788685917854309, "learning_rate": 4.952490655374009e-08, "loss": 0.5602, "step": 13683 }, { "epoch": 0.9633227736712425, "grad_norm": 1.8205682039260864, "learning_rate": 4.933556070071099e-08, "loss": 0.5379, "step": 13684 }, { "epoch": 0.9633931714185147, "grad_norm": 1.8902606964111328, "learning_rate": 4.914657621901952e-08, "loss": 0.6016, "step": 13685 }, { "epoch": 0.9634635691657867, "grad_norm": 2.2452504634857178, "learning_rate": 4.89579531184936e-08, "loss": 0.7491, "step": 13686 }, { "epoch": 0.9635339669130588, "grad_norm": 1.9576537609100342, "learning_rate": 4.876969140893861e-08, "loss": 0.6756, "step": 13687 }, { "epoch": 0.9636043646603308, "grad_norm": 1.6032516956329346, "learning_rate": 4.85817911001436e-08, "loss": 0.7054, "step": 13688 }, { "epoch": 0.963674762407603, "grad_norm": 1.879217505455017, "learning_rate": 4.8394252201875874e-08, "loss": 0.6189, "step": 13689 }, { "epoch": 0.963745160154875, "grad_norm": 1.9222198724746704, "learning_rate": 4.8207074723886414e-08, "loss": 0.6918, "step": 13690 }, { "epoch": 0.9638155579021471, "grad_norm": 2.85994553565979, "learning_rate": 4.8020258675907535e-08, "loss": 0.6966, "step": 13691 }, { "epoch": 0.9638859556494193, "grad_norm": 1.9601545333862305, "learning_rate": 4.7833804067651364e-08, "loss": 0.5764, "step": 13692 }, { "epoch": 0.9639563533966913, "grad_norm": 3.5736024379730225, "learning_rate": 4.764771090881292e-08, "loss": 0.6153, "step": 13693 }, { "epoch": 0.9640267511439634, "grad_norm": 1.934577226638794, "learning_rate": 4.7461979209065474e-08, "loss": 0.6435, "step": 13694 }, { "epoch": 0.9640971488912354, "grad_norm": 2.4388949871063232, "learning_rate": 4.727660897806596e-08, "loss": 0.6673, "step": 13695 }, { "epoch": 0.9641675466385076, "grad_norm": 1.8684061765670776, "learning_rate": 4.7091600225453443e-08, "loss": 0.5635, "step": 13696 }, { "epoch": 0.9642379443857797, "grad_norm": 2.0020551681518555, "learning_rate": 4.690695296084446e-08, "loss": 0.6221, "step": 13697 }, { "epoch": 0.9643083421330517, "grad_norm": 2.44270920753479, "learning_rate": 4.6722667193840774e-08, "loss": 0.5463, "step": 13698 }, { "epoch": 0.9643787398803239, "grad_norm": 2.484109401702881, "learning_rate": 4.653874293402238e-08, "loss": 0.5261, "step": 13699 }, { "epoch": 0.9644491376275959, "grad_norm": 1.8915208578109741, "learning_rate": 4.635518019095297e-08, "loss": 0.6331, "step": 13700 }, { "epoch": 0.964519535374868, "grad_norm": 1.8285003900527954, "learning_rate": 4.617197897417446e-08, "loss": 0.5944, "step": 13701 }, { "epoch": 0.9645899331221401, "grad_norm": 2.1266233921051025, "learning_rate": 4.598913929321324e-08, "loss": 0.6461, "step": 13702 }, { "epoch": 0.9646603308694122, "grad_norm": 2.3052542209625244, "learning_rate": 4.5806661157573925e-08, "loss": 0.6764, "step": 13703 }, { "epoch": 0.9647307286166843, "grad_norm": 2.432960033416748, "learning_rate": 4.562454457674481e-08, "loss": 0.6349, "step": 13704 }, { "epoch": 0.9648011263639563, "grad_norm": 1.808173418045044, "learning_rate": 4.544278956019398e-08, "loss": 0.5895, "step": 13705 }, { "epoch": 0.9648715241112285, "grad_norm": 1.897590160369873, "learning_rate": 4.5261396117370124e-08, "loss": 0.565, "step": 13706 }, { "epoch": 0.9649419218585005, "grad_norm": 1.9976789951324463, "learning_rate": 4.508036425770556e-08, "loss": 0.5121, "step": 13707 }, { "epoch": 0.9650123196057726, "grad_norm": 1.755511999130249, "learning_rate": 4.4899693990611666e-08, "loss": 0.5495, "step": 13708 }, { "epoch": 0.9650827173530447, "grad_norm": 1.722562313079834, "learning_rate": 4.4719385325481144e-08, "loss": 0.6023, "step": 13709 }, { "epoch": 0.9651531151003168, "grad_norm": 2.240427255630493, "learning_rate": 4.45394382716896e-08, "loss": 0.6186, "step": 13710 }, { "epoch": 0.9652235128475889, "grad_norm": 1.8379778861999512, "learning_rate": 4.4359852838591676e-08, "loss": 0.5972, "step": 13711 }, { "epoch": 0.9652939105948609, "grad_norm": 2.2406554222106934, "learning_rate": 4.418062903552411e-08, "loss": 0.7509, "step": 13712 }, { "epoch": 0.965364308342133, "grad_norm": 1.744474172592163, "learning_rate": 4.400176687180501e-08, "loss": 0.5903, "step": 13713 }, { "epoch": 0.9654347060894052, "grad_norm": 2.278252363204956, "learning_rate": 4.3823266356733835e-08, "loss": 0.6373, "step": 13714 }, { "epoch": 0.9655051038366772, "grad_norm": 2.012127637863159, "learning_rate": 4.364512749959137e-08, "loss": 0.6659, "step": 13715 }, { "epoch": 0.9655755015839493, "grad_norm": 2.0083298683166504, "learning_rate": 4.346735030963822e-08, "loss": 0.7038, "step": 13716 }, { "epoch": 0.9656458993312214, "grad_norm": 2.1117491722106934, "learning_rate": 4.328993479611864e-08, "loss": 0.5787, "step": 13717 }, { "epoch": 0.9657162970784935, "grad_norm": 1.7972369194030762, "learning_rate": 4.3112880968254385e-08, "loss": 0.6348, "step": 13718 }, { "epoch": 0.9657866948257656, "grad_norm": 2.3543920516967773, "learning_rate": 4.293618883525318e-08, "loss": 0.5828, "step": 13719 }, { "epoch": 0.9658570925730376, "grad_norm": 2.033046007156372, "learning_rate": 4.275985840629948e-08, "loss": 0.5921, "step": 13720 }, { "epoch": 0.9659274903203098, "grad_norm": 2.2680375576019287, "learning_rate": 4.2583889690562146e-08, "loss": 0.5027, "step": 13721 }, { "epoch": 0.9659978880675818, "grad_norm": 2.0761473178863525, "learning_rate": 4.24082826971891e-08, "loss": 0.6492, "step": 13722 }, { "epoch": 0.9660682858148539, "grad_norm": 1.8724055290222168, "learning_rate": 4.223303743531037e-08, "loss": 0.644, "step": 13723 }, { "epoch": 0.9661386835621261, "grad_norm": 1.912227749824524, "learning_rate": 4.2058153914037334e-08, "loss": 0.6217, "step": 13724 }, { "epoch": 0.9662090813093981, "grad_norm": 2.0713272094726562, "learning_rate": 4.188363214246271e-08, "loss": 0.7078, "step": 13725 }, { "epoch": 0.9662794790566702, "grad_norm": 1.892340898513794, "learning_rate": 4.1709472129659805e-08, "loss": 0.6538, "step": 13726 }, { "epoch": 0.9663498768039422, "grad_norm": 2.1259801387786865, "learning_rate": 4.1535673884683264e-08, "loss": 0.6784, "step": 13727 }, { "epoch": 0.9664202745512144, "grad_norm": 2.2566416263580322, "learning_rate": 4.1362237416568305e-08, "loss": 0.6666, "step": 13728 }, { "epoch": 0.9664906722984864, "grad_norm": 2.335249900817871, "learning_rate": 4.1189162734333815e-08, "loss": 0.6866, "step": 13729 }, { "epoch": 0.9665610700457585, "grad_norm": 1.7556648254394531, "learning_rate": 4.1016449846976165e-08, "loss": 0.6344, "step": 13730 }, { "epoch": 0.9666314677930307, "grad_norm": 2.0587639808654785, "learning_rate": 4.084409876347539e-08, "loss": 0.5968, "step": 13731 }, { "epoch": 0.9667018655403027, "grad_norm": 1.611339807510376, "learning_rate": 4.0672109492792877e-08, "loss": 0.6389, "step": 13732 }, { "epoch": 0.9667722632875748, "grad_norm": 2.242575168609619, "learning_rate": 4.0500482043870576e-08, "loss": 0.6742, "step": 13733 }, { "epoch": 0.9668426610348468, "grad_norm": 1.7965474128723145, "learning_rate": 4.032921642563103e-08, "loss": 0.7114, "step": 13734 }, { "epoch": 0.966913058782119, "grad_norm": 1.9060027599334717, "learning_rate": 4.015831264697889e-08, "loss": 0.7644, "step": 13735 }, { "epoch": 0.9669834565293911, "grad_norm": 1.8464876413345337, "learning_rate": 3.99877707167986e-08, "loss": 0.6625, "step": 13736 }, { "epoch": 0.9670538542766631, "grad_norm": 2.0757455825805664, "learning_rate": 3.9817590643957514e-08, "loss": 0.5526, "step": 13737 }, { "epoch": 0.9671242520239353, "grad_norm": 2.465709924697876, "learning_rate": 3.9647772437303555e-08, "loss": 0.6474, "step": 13738 }, { "epoch": 0.9671946497712073, "grad_norm": 1.9949442148208618, "learning_rate": 3.947831610566521e-08, "loss": 0.7046, "step": 13739 }, { "epoch": 0.9672650475184794, "grad_norm": 2.1636664867401123, "learning_rate": 3.930922165785311e-08, "loss": 0.6579, "step": 13740 }, { "epoch": 0.9673354452657515, "grad_norm": 2.1165449619293213, "learning_rate": 3.9140489102659214e-08, "loss": 0.6063, "step": 13741 }, { "epoch": 0.9674058430130236, "grad_norm": 2.056638717651367, "learning_rate": 3.8972118448855283e-08, "loss": 0.5656, "step": 13742 }, { "epoch": 0.9674762407602957, "grad_norm": 2.017348289489746, "learning_rate": 3.8804109705194434e-08, "loss": 0.5285, "step": 13743 }, { "epoch": 0.9675466385075677, "grad_norm": 1.928014874458313, "learning_rate": 3.863646288041267e-08, "loss": 0.562, "step": 13744 }, { "epoch": 0.9676170362548399, "grad_norm": 2.2839653491973877, "learning_rate": 3.846917798322658e-08, "loss": 0.6967, "step": 13745 }, { "epoch": 0.9676874340021119, "grad_norm": 2.198289394378662, "learning_rate": 3.830225502233175e-08, "loss": 0.6358, "step": 13746 }, { "epoch": 0.967757831749384, "grad_norm": 1.8184248208999634, "learning_rate": 3.813569400640826e-08, "loss": 0.5485, "step": 13747 }, { "epoch": 0.9678282294966561, "grad_norm": 2.0214271545410156, "learning_rate": 3.796949494411439e-08, "loss": 0.6928, "step": 13748 }, { "epoch": 0.9678986272439282, "grad_norm": 1.9419320821762085, "learning_rate": 3.780365784409212e-08, "loss": 0.7494, "step": 13749 }, { "epoch": 0.9679690249912003, "grad_norm": 2.0093870162963867, "learning_rate": 3.763818271496244e-08, "loss": 0.6123, "step": 13750 }, { "epoch": 0.9680394227384723, "grad_norm": 3.599543809890747, "learning_rate": 3.747306956533003e-08, "loss": 0.6356, "step": 13751 }, { "epoch": 0.9681098204857445, "grad_norm": 1.6152442693710327, "learning_rate": 3.730831840377702e-08, "loss": 0.6024, "step": 13752 }, { "epoch": 0.9681802182330166, "grad_norm": 2.727076292037964, "learning_rate": 3.7143929238871555e-08, "loss": 0.7176, "step": 13753 }, { "epoch": 0.9682506159802886, "grad_norm": 1.813298225402832, "learning_rate": 3.6979902079159245e-08, "loss": 0.6077, "step": 13754 }, { "epoch": 0.9683210137275607, "grad_norm": 2.0602526664733887, "learning_rate": 3.6816236933167045e-08, "loss": 0.7281, "step": 13755 }, { "epoch": 0.9683914114748328, "grad_norm": 2.108492374420166, "learning_rate": 3.66529338094056e-08, "loss": 0.6805, "step": 13756 }, { "epoch": 0.9684618092221049, "grad_norm": 2.101062059402466, "learning_rate": 3.648999271636377e-08, "loss": 0.6342, "step": 13757 }, { "epoch": 0.968532206969377, "grad_norm": 2.110302686691284, "learning_rate": 3.6327413662514126e-08, "loss": 0.5507, "step": 13758 }, { "epoch": 0.968602604716649, "grad_norm": 2.1095926761627197, "learning_rate": 3.6165196656309016e-08, "loss": 0.6567, "step": 13759 }, { "epoch": 0.9686730024639212, "grad_norm": 2.2759459018707275, "learning_rate": 3.6003341706182136e-08, "loss": 0.6682, "step": 13760 }, { "epoch": 0.9687434002111932, "grad_norm": 2.211298704147339, "learning_rate": 3.5841848820548525e-08, "loss": 0.6534, "step": 13761 }, { "epoch": 0.9688137979584653, "grad_norm": 1.9167650938034058, "learning_rate": 3.568071800780459e-08, "loss": 0.7187, "step": 13762 }, { "epoch": 0.9688841957057374, "grad_norm": 2.0093300342559814, "learning_rate": 3.551994927632651e-08, "loss": 0.6064, "step": 13763 }, { "epoch": 0.9689545934530095, "grad_norm": 2.1279139518737793, "learning_rate": 3.535954263447416e-08, "loss": 0.6517, "step": 13764 }, { "epoch": 0.9690249912002816, "grad_norm": 1.8638882637023926, "learning_rate": 3.51994980905872e-08, "loss": 0.7537, "step": 13765 }, { "epoch": 0.9690953889475536, "grad_norm": 2.17608380317688, "learning_rate": 3.5039815652985864e-08, "loss": 0.5723, "step": 13766 }, { "epoch": 0.9691657866948258, "grad_norm": 1.6910866498947144, "learning_rate": 3.4880495329971724e-08, "loss": 0.7545, "step": 13767 }, { "epoch": 0.9692361844420978, "grad_norm": 2.3649721145629883, "learning_rate": 3.4721537129829285e-08, "loss": 0.6347, "step": 13768 }, { "epoch": 0.9693065821893699, "grad_norm": 2.007607936859131, "learning_rate": 3.456294106082125e-08, "loss": 0.5703, "step": 13769 }, { "epoch": 0.9693769799366421, "grad_norm": 2.212717294692993, "learning_rate": 3.440470713119559e-08, "loss": 0.6582, "step": 13770 }, { "epoch": 0.9694473776839141, "grad_norm": 2.257544994354248, "learning_rate": 3.424683534917694e-08, "loss": 0.7785, "step": 13771 }, { "epoch": 0.9695177754311862, "grad_norm": 2.1596286296844482, "learning_rate": 3.4089325722973626e-08, "loss": 0.6152, "step": 13772 }, { "epoch": 0.9695881731784582, "grad_norm": 1.7524927854537964, "learning_rate": 3.3932178260775325e-08, "loss": 0.6171, "step": 13773 }, { "epoch": 0.9696585709257304, "grad_norm": 2.126952886581421, "learning_rate": 3.3775392970751495e-08, "loss": 0.6621, "step": 13774 }, { "epoch": 0.9697289686730025, "grad_norm": 3.2249696254730225, "learning_rate": 3.3618969861054504e-08, "loss": 0.6192, "step": 13775 }, { "epoch": 0.9697993664202745, "grad_norm": 1.8159661293029785, "learning_rate": 3.3462908939815736e-08, "loss": 0.6157, "step": 13776 }, { "epoch": 0.9698697641675467, "grad_norm": 2.0638270378112793, "learning_rate": 3.330721021515026e-08, "loss": 0.6378, "step": 13777 }, { "epoch": 0.9699401619148187, "grad_norm": 2.250180244445801, "learning_rate": 3.315187369515216e-08, "loss": 0.7498, "step": 13778 }, { "epoch": 0.9700105596620908, "grad_norm": 2.424560070037842, "learning_rate": 3.2996899387897625e-08, "loss": 0.6402, "step": 13779 }, { "epoch": 0.970080957409363, "grad_norm": 2.0093135833740234, "learning_rate": 3.284228730144423e-08, "loss": 0.6501, "step": 13780 }, { "epoch": 0.970151355156635, "grad_norm": 1.8810350894927979, "learning_rate": 3.268803744383009e-08, "loss": 0.5327, "step": 13781 }, { "epoch": 0.9702217529039071, "grad_norm": 2.0646541118621826, "learning_rate": 3.2534149823074676e-08, "loss": 0.6755, "step": 13782 }, { "epoch": 0.9702921506511791, "grad_norm": 2.23921799659729, "learning_rate": 3.238062444717882e-08, "loss": 0.6152, "step": 13783 }, { "epoch": 0.9703625483984513, "grad_norm": 2.328871965408325, "learning_rate": 3.222746132412468e-08, "loss": 0.8001, "step": 13784 }, { "epoch": 0.9704329461457233, "grad_norm": 3.347743511199951, "learning_rate": 3.2074660461875014e-08, "loss": 0.6823, "step": 13785 }, { "epoch": 0.9705033438929954, "grad_norm": 2.5995590686798096, "learning_rate": 3.192222186837468e-08, "loss": 0.5941, "step": 13786 }, { "epoch": 0.9705737416402676, "grad_norm": 2.1383590698242188, "learning_rate": 3.177014555154833e-08, "loss": 0.675, "step": 13787 }, { "epoch": 0.9706441393875396, "grad_norm": 2.100113868713379, "learning_rate": 3.161843151930277e-08, "loss": 0.6546, "step": 13788 }, { "epoch": 0.9707145371348117, "grad_norm": 1.831207036972046, "learning_rate": 3.146707977952612e-08, "loss": 0.6857, "step": 13789 }, { "epoch": 0.9707849348820837, "grad_norm": 1.7736589908599854, "learning_rate": 3.131609034008787e-08, "loss": 0.5573, "step": 13790 }, { "epoch": 0.9708553326293559, "grad_norm": 1.8500123023986816, "learning_rate": 3.116546320883573e-08, "loss": 0.7001, "step": 13791 }, { "epoch": 0.970925730376628, "grad_norm": 1.9352549314498901, "learning_rate": 3.1015198393603446e-08, "loss": 0.5978, "step": 13792 }, { "epoch": 0.9709961281239, "grad_norm": 1.9136306047439575, "learning_rate": 3.086529590220222e-08, "loss": 0.582, "step": 13793 }, { "epoch": 0.9710665258711721, "grad_norm": 2.065199375152588, "learning_rate": 3.071575574242613e-08, "loss": 0.6702, "step": 13794 }, { "epoch": 0.9711369236184442, "grad_norm": 3.1111533641815186, "learning_rate": 3.0566577922049864e-08, "loss": 0.6772, "step": 13795 }, { "epoch": 0.9712073213657163, "grad_norm": 1.74871027469635, "learning_rate": 3.0417762448828655e-08, "loss": 0.599, "step": 13796 }, { "epoch": 0.9712777191129884, "grad_norm": 2.356180191040039, "learning_rate": 3.026930933050065e-08, "loss": 0.638, "step": 13797 }, { "epoch": 0.9713481168602605, "grad_norm": 2.254185199737549, "learning_rate": 3.0121218574783004e-08, "loss": 0.8017, "step": 13798 }, { "epoch": 0.9714185146075326, "grad_norm": 2.1784896850585938, "learning_rate": 2.9973490189375005e-08, "loss": 0.5563, "step": 13799 }, { "epoch": 0.9714889123548046, "grad_norm": 2.4291844367980957, "learning_rate": 2.982612418195807e-08, "loss": 0.6661, "step": 13800 }, { "epoch": 0.9715593101020767, "grad_norm": 2.2967331409454346, "learning_rate": 2.9679120560193395e-08, "loss": 0.7242, "step": 13801 }, { "epoch": 0.9716297078493488, "grad_norm": 1.7246410846710205, "learning_rate": 2.9532479331724314e-08, "loss": 0.4971, "step": 13802 }, { "epoch": 0.9717001055966209, "grad_norm": 2.4033584594726562, "learning_rate": 2.9386200504173952e-08, "loss": 0.6688, "step": 13803 }, { "epoch": 0.971770503343893, "grad_norm": 2.190147638320923, "learning_rate": 2.9240284085148338e-08, "loss": 0.5297, "step": 13804 }, { "epoch": 0.971840901091165, "grad_norm": 2.003810405731201, "learning_rate": 2.9094730082233287e-08, "loss": 0.6186, "step": 13805 }, { "epoch": 0.9719112988384372, "grad_norm": 2.5083229541778564, "learning_rate": 2.894953850299675e-08, "loss": 0.6584, "step": 13806 }, { "epoch": 0.9719816965857092, "grad_norm": 2.0494346618652344, "learning_rate": 2.8804709354986468e-08, "loss": 0.5935, "step": 13807 }, { "epoch": 0.9720520943329813, "grad_norm": 1.530909538269043, "learning_rate": 2.866024264573308e-08, "loss": 0.6716, "step": 13808 }, { "epoch": 0.9721224920802535, "grad_norm": 2.096888542175293, "learning_rate": 2.85161383827478e-08, "loss": 0.6397, "step": 13809 }, { "epoch": 0.9721928898275255, "grad_norm": 2.1749026775360107, "learning_rate": 2.837239657352164e-08, "loss": 0.6469, "step": 13810 }, { "epoch": 0.9722632875747976, "grad_norm": 2.020385265350342, "learning_rate": 2.8229017225528507e-08, "loss": 0.5683, "step": 13811 }, { "epoch": 0.9723336853220697, "grad_norm": 2.1140897274017334, "learning_rate": 2.8086000346222882e-08, "loss": 0.6913, "step": 13812 }, { "epoch": 0.9724040830693418, "grad_norm": 1.8613414764404297, "learning_rate": 2.7943345943040596e-08, "loss": 0.5747, "step": 13813 }, { "epoch": 0.9724744808166139, "grad_norm": 2.575803279876709, "learning_rate": 2.7801054023397276e-08, "loss": 0.5775, "step": 13814 }, { "epoch": 0.9725448785638859, "grad_norm": 1.8442661762237549, "learning_rate": 2.765912459469222e-08, "loss": 0.6239, "step": 13815 }, { "epoch": 0.9726152763111581, "grad_norm": 1.7612059116363525, "learning_rate": 2.7517557664302973e-08, "loss": 0.6679, "step": 13816 }, { "epoch": 0.9726856740584301, "grad_norm": 2.4677224159240723, "learning_rate": 2.7376353239591534e-08, "loss": 0.5866, "step": 13817 }, { "epoch": 0.9727560718057022, "grad_norm": 2.812812089920044, "learning_rate": 2.7235511327898143e-08, "loss": 0.6094, "step": 13818 }, { "epoch": 0.9728264695529744, "grad_norm": 2.116992712020874, "learning_rate": 2.7095031936545164e-08, "loss": 0.5641, "step": 13819 }, { "epoch": 0.9728968673002464, "grad_norm": 2.0512852668762207, "learning_rate": 2.6954915072836315e-08, "loss": 0.6909, "step": 13820 }, { "epoch": 0.9729672650475185, "grad_norm": 1.9277839660644531, "learning_rate": 2.6815160744056654e-08, "loss": 0.6588, "step": 13821 }, { "epoch": 0.9730376627947905, "grad_norm": 1.9388375282287598, "learning_rate": 2.6675768957471812e-08, "loss": 0.5145, "step": 13822 }, { "epoch": 0.9731080605420627, "grad_norm": 2.120755434036255, "learning_rate": 2.6536739720329548e-08, "loss": 0.6715, "step": 13823 }, { "epoch": 0.9731784582893347, "grad_norm": 1.7773373126983643, "learning_rate": 2.6398073039858193e-08, "loss": 0.7001, "step": 13824 }, { "epoch": 0.9732488560366068, "grad_norm": 1.9858182668685913, "learning_rate": 2.6259768923266647e-08, "loss": 0.6148, "step": 13825 }, { "epoch": 0.973319253783879, "grad_norm": 1.94850492477417, "learning_rate": 2.6121827377744377e-08, "loss": 0.7132, "step": 13826 }, { "epoch": 0.973389651531151, "grad_norm": 1.8273011445999146, "learning_rate": 2.5984248410465316e-08, "loss": 0.5647, "step": 13827 }, { "epoch": 0.9734600492784231, "grad_norm": 2.043755292892456, "learning_rate": 2.584703202858085e-08, "loss": 0.7167, "step": 13828 }, { "epoch": 0.9735304470256951, "grad_norm": 2.362682819366455, "learning_rate": 2.571017823922528e-08, "loss": 0.6403, "step": 13829 }, { "epoch": 0.9736008447729673, "grad_norm": 1.7078474760055542, "learning_rate": 2.557368704951346e-08, "loss": 0.6568, "step": 13830 }, { "epoch": 0.9736712425202394, "grad_norm": 1.7653274536132812, "learning_rate": 2.5437558466543164e-08, "loss": 0.57, "step": 13831 }, { "epoch": 0.9737416402675114, "grad_norm": 1.721909523010254, "learning_rate": 2.530179249738962e-08, "loss": 0.6512, "step": 13832 }, { "epoch": 0.9738120380147836, "grad_norm": 1.807871699333191, "learning_rate": 2.5166389149113288e-08, "loss": 0.6403, "step": 13833 }, { "epoch": 0.9738824357620556, "grad_norm": 2.1584830284118652, "learning_rate": 2.50313484287521e-08, "loss": 0.7158, "step": 13834 }, { "epoch": 0.9739528335093277, "grad_norm": 1.9495420455932617, "learning_rate": 2.4896670343329206e-08, "loss": 0.6859, "step": 13835 }, { "epoch": 0.9740232312565998, "grad_norm": 1.9879692792892456, "learning_rate": 2.476235489984524e-08, "loss": 0.6168, "step": 13836 }, { "epoch": 0.9740936290038719, "grad_norm": 1.999027967453003, "learning_rate": 2.4628402105282943e-08, "loss": 0.571, "step": 13837 }, { "epoch": 0.974164026751144, "grad_norm": 1.825156807899475, "learning_rate": 2.4494811966607964e-08, "loss": 0.5776, "step": 13838 }, { "epoch": 0.974234424498416, "grad_norm": 2.056699514389038, "learning_rate": 2.4361584490764977e-08, "loss": 0.6569, "step": 13839 }, { "epoch": 0.9743048222456882, "grad_norm": 1.8539211750030518, "learning_rate": 2.422871968467999e-08, "loss": 0.6684, "step": 13840 }, { "epoch": 0.9743752199929602, "grad_norm": 1.9481741189956665, "learning_rate": 2.40962175552627e-08, "loss": 0.5466, "step": 13841 }, { "epoch": 0.9744456177402323, "grad_norm": 2.0550355911254883, "learning_rate": 2.3964078109400265e-08, "loss": 0.5996, "step": 13842 }, { "epoch": 0.9745160154875044, "grad_norm": 2.2819433212280273, "learning_rate": 2.3832301353963526e-08, "loss": 0.6794, "step": 13843 }, { "epoch": 0.9745864132347765, "grad_norm": 1.8703759908676147, "learning_rate": 2.370088729580233e-08, "loss": 0.6743, "step": 13844 }, { "epoch": 0.9746568109820486, "grad_norm": 1.8964557647705078, "learning_rate": 2.3569835941750993e-08, "loss": 0.6691, "step": 13845 }, { "epoch": 0.9747272087293206, "grad_norm": 2.0510387420654297, "learning_rate": 2.3439147298622064e-08, "loss": 0.6789, "step": 13846 }, { "epoch": 0.9747976064765927, "grad_norm": 1.7790250778198242, "learning_rate": 2.330882137321022e-08, "loss": 0.6605, "step": 13847 }, { "epoch": 0.9748680042238649, "grad_norm": 1.8985987901687622, "learning_rate": 2.3178858172291484e-08, "loss": 0.7453, "step": 13848 }, { "epoch": 0.9749384019711369, "grad_norm": 1.9306399822235107, "learning_rate": 2.3049257702622448e-08, "loss": 0.7393, "step": 13849 }, { "epoch": 0.975008799718409, "grad_norm": 1.9223668575286865, "learning_rate": 2.2920019970940287e-08, "loss": 0.6218, "step": 13850 }, { "epoch": 0.9750791974656811, "grad_norm": 2.1543235778808594, "learning_rate": 2.2791144983965837e-08, "loss": 0.5695, "step": 13851 }, { "epoch": 0.9751495952129532, "grad_norm": 2.402139186859131, "learning_rate": 2.266263274839897e-08, "loss": 0.7162, "step": 13852 }, { "epoch": 0.9752199929602253, "grad_norm": 1.901343584060669, "learning_rate": 2.2534483270920114e-08, "loss": 0.669, "step": 13853 }, { "epoch": 0.9752903907074973, "grad_norm": 1.8808670043945312, "learning_rate": 2.2406696558193385e-08, "loss": 0.6065, "step": 13854 }, { "epoch": 0.9753607884547695, "grad_norm": 1.8576053380966187, "learning_rate": 2.2279272616861135e-08, "loss": 0.6347, "step": 13855 }, { "epoch": 0.9754311862020415, "grad_norm": 1.8071579933166504, "learning_rate": 2.2152211453549396e-08, "loss": 0.6097, "step": 13856 }, { "epoch": 0.9755015839493136, "grad_norm": 1.5272597074508667, "learning_rate": 2.2025513074863222e-08, "loss": 0.5678, "step": 13857 }, { "epoch": 0.9755719816965857, "grad_norm": 2.009242296218872, "learning_rate": 2.1899177487390563e-08, "loss": 0.5959, "step": 13858 }, { "epoch": 0.9756423794438578, "grad_norm": 2.187286615371704, "learning_rate": 2.1773204697699167e-08, "loss": 0.6089, "step": 13859 }, { "epoch": 0.9757127771911299, "grad_norm": 1.8573803901672363, "learning_rate": 2.164759471233968e-08, "loss": 0.628, "step": 13860 }, { "epoch": 0.9757831749384019, "grad_norm": 2.5342578887939453, "learning_rate": 2.152234753784099e-08, "loss": 0.682, "step": 13861 }, { "epoch": 0.9758535726856741, "grad_norm": 2.3051044940948486, "learning_rate": 2.139746318071567e-08, "loss": 0.6661, "step": 13862 }, { "epoch": 0.9759239704329461, "grad_norm": 1.8559985160827637, "learning_rate": 2.127294164745608e-08, "loss": 0.6014, "step": 13863 }, { "epoch": 0.9759943681802182, "grad_norm": 2.173276662826538, "learning_rate": 2.1148782944536703e-08, "loss": 0.6273, "step": 13864 }, { "epoch": 0.9760647659274904, "grad_norm": 2.2270429134368896, "learning_rate": 2.1024987078411826e-08, "loss": 0.6448, "step": 13865 }, { "epoch": 0.9761351636747624, "grad_norm": 2.927433490753174, "learning_rate": 2.090155405551941e-08, "loss": 0.6222, "step": 13866 }, { "epoch": 0.9762055614220345, "grad_norm": 2.027301788330078, "learning_rate": 2.0778483882275656e-08, "loss": 0.6716, "step": 13867 }, { "epoch": 0.9762759591693065, "grad_norm": 2.2907028198242188, "learning_rate": 2.0655776565079665e-08, "loss": 0.6039, "step": 13868 }, { "epoch": 0.9763463569165787, "grad_norm": 1.721142053604126, "learning_rate": 2.053343211030956e-08, "loss": 0.5974, "step": 13869 }, { "epoch": 0.9764167546638508, "grad_norm": 1.9070703983306885, "learning_rate": 2.041145052432869e-08, "loss": 0.5604, "step": 13870 }, { "epoch": 0.9764871524111228, "grad_norm": 3.7234890460968018, "learning_rate": 2.0289831813476322e-08, "loss": 0.6713, "step": 13871 }, { "epoch": 0.976557550158395, "grad_norm": 2.377213478088379, "learning_rate": 2.016857598407773e-08, "loss": 0.6441, "step": 13872 }, { "epoch": 0.976627947905667, "grad_norm": 2.015456199645996, "learning_rate": 2.004768304243565e-08, "loss": 0.599, "step": 13873 }, { "epoch": 0.9766983456529391, "grad_norm": 1.7866677045822144, "learning_rate": 1.9927152994836493e-08, "loss": 0.6745, "step": 13874 }, { "epoch": 0.9767687434002112, "grad_norm": 1.762474536895752, "learning_rate": 1.98069858475457e-08, "loss": 0.5882, "step": 13875 }, { "epoch": 0.9768391411474833, "grad_norm": 2.118748903274536, "learning_rate": 1.96871816068116e-08, "loss": 0.6522, "step": 13876 }, { "epoch": 0.9769095388947554, "grad_norm": 1.7152910232543945, "learning_rate": 1.956774027886232e-08, "loss": 0.623, "step": 13877 }, { "epoch": 0.9769799366420274, "grad_norm": 1.8316621780395508, "learning_rate": 1.9448661869908123e-08, "loss": 0.6189, "step": 13878 }, { "epoch": 0.9770503343892996, "grad_norm": 1.845401406288147, "learning_rate": 1.9329946386140607e-08, "loss": 0.6614, "step": 13879 }, { "epoch": 0.9771207321365716, "grad_norm": 1.9917969703674316, "learning_rate": 1.921159383373039e-08, "loss": 0.6096, "step": 13880 }, { "epoch": 0.9771911298838437, "grad_norm": 1.9638712406158447, "learning_rate": 1.909360421883177e-08, "loss": 0.5694, "step": 13881 }, { "epoch": 0.9772615276311158, "grad_norm": 2.659841537475586, "learning_rate": 1.8975977547579626e-08, "loss": 0.5996, "step": 13882 }, { "epoch": 0.9773319253783879, "grad_norm": 2.295395612716675, "learning_rate": 1.8858713826087836e-08, "loss": 0.6725, "step": 13883 }, { "epoch": 0.97740232312566, "grad_norm": 1.779305100440979, "learning_rate": 1.8741813060454746e-08, "loss": 0.605, "step": 13884 }, { "epoch": 0.977472720872932, "grad_norm": 1.9546655416488647, "learning_rate": 1.8625275256756946e-08, "loss": 0.5521, "step": 13885 }, { "epoch": 0.9775431186202042, "grad_norm": 2.355107545852661, "learning_rate": 1.8509100421053913e-08, "loss": 0.6819, "step": 13886 }, { "epoch": 0.9776135163674763, "grad_norm": 1.9291077852249146, "learning_rate": 1.839328855938571e-08, "loss": 0.611, "step": 13887 }, { "epoch": 0.9776839141147483, "grad_norm": 1.707094430923462, "learning_rate": 1.8277839677773743e-08, "loss": 0.6856, "step": 13888 }, { "epoch": 0.9777543118620204, "grad_norm": 3.1466445922851562, "learning_rate": 1.816275378221921e-08, "loss": 0.6719, "step": 13889 }, { "epoch": 0.9778247096092925, "grad_norm": 2.2554380893707275, "learning_rate": 1.8048030878706213e-08, "loss": 0.6391, "step": 13890 }, { "epoch": 0.9778951073565646, "grad_norm": 1.7734118700027466, "learning_rate": 1.7933670973199422e-08, "loss": 0.6176, "step": 13891 }, { "epoch": 0.9779655051038367, "grad_norm": 2.480674982070923, "learning_rate": 1.7819674071644864e-08, "loss": 0.7438, "step": 13892 }, { "epoch": 0.9780359028511088, "grad_norm": 2.0716285705566406, "learning_rate": 1.7706040179968352e-08, "loss": 0.5728, "step": 13893 }, { "epoch": 0.9781063005983809, "grad_norm": 2.003028154373169, "learning_rate": 1.7592769304078604e-08, "loss": 0.5889, "step": 13894 }, { "epoch": 0.9781766983456529, "grad_norm": 2.124868392944336, "learning_rate": 1.7479861449864132e-08, "loss": 0.6142, "step": 13895 }, { "epoch": 0.978247096092925, "grad_norm": 2.04422926902771, "learning_rate": 1.736731662319635e-08, "loss": 0.5897, "step": 13896 }, { "epoch": 0.9783174938401971, "grad_norm": 2.4537200927734375, "learning_rate": 1.725513482992491e-08, "loss": 0.6467, "step": 13897 }, { "epoch": 0.9783878915874692, "grad_norm": 1.790695071220398, "learning_rate": 1.7143316075883152e-08, "loss": 0.6422, "step": 13898 }, { "epoch": 0.9784582893347413, "grad_norm": 2.1047520637512207, "learning_rate": 1.7031860366883422e-08, "loss": 0.732, "step": 13899 }, { "epoch": 0.9785286870820133, "grad_norm": 1.7866829633712769, "learning_rate": 1.6920767708722527e-08, "loss": 0.5976, "step": 13900 }, { "epoch": 0.9785990848292855, "grad_norm": 1.88518488407135, "learning_rate": 1.681003810717474e-08, "loss": 0.5731, "step": 13901 }, { "epoch": 0.9786694825765575, "grad_norm": 1.81674325466156, "learning_rate": 1.669967156799801e-08, "loss": 0.6891, "step": 13902 }, { "epoch": 0.9787398803238296, "grad_norm": 1.8479012250900269, "learning_rate": 1.65896680969293e-08, "loss": 0.6021, "step": 13903 }, { "epoch": 0.9788102780711018, "grad_norm": 2.177363395690918, "learning_rate": 1.648002769968848e-08, "loss": 0.7644, "step": 13904 }, { "epoch": 0.9788806758183738, "grad_norm": 2.0527548789978027, "learning_rate": 1.637075038197522e-08, "loss": 0.7305, "step": 13905 }, { "epoch": 0.9789510735656459, "grad_norm": 2.0223965644836426, "learning_rate": 1.6261836149472074e-08, "loss": 0.683, "step": 13906 }, { "epoch": 0.979021471312918, "grad_norm": 2.2245566844940186, "learning_rate": 1.6153285007840634e-08, "loss": 0.6173, "step": 13907 }, { "epoch": 0.9790918690601901, "grad_norm": 2.1556358337402344, "learning_rate": 1.60450969627246e-08, "loss": 0.6861, "step": 13908 }, { "epoch": 0.9791622668074622, "grad_norm": 1.4890003204345703, "learning_rate": 1.593727201974904e-08, "loss": 0.442, "step": 13909 }, { "epoch": 0.9792326645547342, "grad_norm": 1.9451042413711548, "learning_rate": 1.5829810184520343e-08, "loss": 0.5701, "step": 13910 }, { "epoch": 0.9793030623020064, "grad_norm": 2.187666893005371, "learning_rate": 1.5722711462624718e-08, "loss": 0.5516, "step": 13911 }, { "epoch": 0.9793734600492784, "grad_norm": 1.9932849407196045, "learning_rate": 1.5615975859630482e-08, "loss": 0.7308, "step": 13912 }, { "epoch": 0.9794438577965505, "grad_norm": 2.0667884349823, "learning_rate": 1.550960338108731e-08, "loss": 0.7485, "step": 13913 }, { "epoch": 0.9795142555438225, "grad_norm": 2.0107059478759766, "learning_rate": 1.5403594032525446e-08, "loss": 0.5765, "step": 13914 }, { "epoch": 0.9795846532910947, "grad_norm": 1.7248635292053223, "learning_rate": 1.529794781945648e-08, "loss": 0.5836, "step": 13915 }, { "epoch": 0.9796550510383668, "grad_norm": 1.7558048963546753, "learning_rate": 1.5192664747372574e-08, "loss": 0.567, "step": 13916 }, { "epoch": 0.9797254487856388, "grad_norm": 1.8172531127929688, "learning_rate": 1.5087744821748018e-08, "loss": 0.6093, "step": 13917 }, { "epoch": 0.979795846532911, "grad_norm": 1.8133544921875, "learning_rate": 1.498318804803689e-08, "loss": 0.8076, "step": 13918 }, { "epoch": 0.979866244280183, "grad_norm": 2.195199489593506, "learning_rate": 1.487899443167695e-08, "loss": 0.5849, "step": 13919 }, { "epoch": 0.9799366420274551, "grad_norm": 1.9511091709136963, "learning_rate": 1.4775163978083427e-08, "loss": 0.5935, "step": 13920 }, { "epoch": 0.9800070397747273, "grad_norm": 1.900455355644226, "learning_rate": 1.4671696692656e-08, "loss": 0.6581, "step": 13921 }, { "epoch": 0.9800774375219993, "grad_norm": 1.9135953187942505, "learning_rate": 1.4568592580772587e-08, "loss": 0.6016, "step": 13922 }, { "epoch": 0.9801478352692714, "grad_norm": 2.168309450149536, "learning_rate": 1.4465851647794791e-08, "loss": 0.5908, "step": 13923 }, { "epoch": 0.9802182330165434, "grad_norm": 2.160351037979126, "learning_rate": 1.4363473899064005e-08, "loss": 0.6346, "step": 13924 }, { "epoch": 0.9802886307638156, "grad_norm": 2.0873334407806396, "learning_rate": 1.4261459339902194e-08, "loss": 0.691, "step": 13925 }, { "epoch": 0.9803590285110877, "grad_norm": 2.1926686763763428, "learning_rate": 1.4159807975614225e-08, "loss": 0.6193, "step": 13926 }, { "epoch": 0.9804294262583597, "grad_norm": 2.1842873096466064, "learning_rate": 1.405851981148476e-08, "loss": 0.5953, "step": 13927 }, { "epoch": 0.9804998240056318, "grad_norm": 2.3529088497161865, "learning_rate": 1.3957594852779031e-08, "loss": 0.635, "step": 13928 }, { "epoch": 0.9805702217529039, "grad_norm": 2.137972593307495, "learning_rate": 1.3857033104745175e-08, "loss": 0.5746, "step": 13929 }, { "epoch": 0.980640619500176, "grad_norm": 2.4479687213897705, "learning_rate": 1.375683457261112e-08, "loss": 0.5976, "step": 13930 }, { "epoch": 0.9807110172474481, "grad_norm": 1.9709559679031372, "learning_rate": 1.3656999261586921e-08, "loss": 0.6576, "step": 13931 }, { "epoch": 0.9807814149947202, "grad_norm": 2.2264909744262695, "learning_rate": 1.3557527176862428e-08, "loss": 0.7995, "step": 13932 }, { "epoch": 0.9808518127419923, "grad_norm": 1.7327836751937866, "learning_rate": 1.3458418323608833e-08, "loss": 0.6612, "step": 13933 }, { "epoch": 0.9809222104892643, "grad_norm": 1.8441612720489502, "learning_rate": 1.3359672706979464e-08, "loss": 0.6012, "step": 13934 }, { "epoch": 0.9809926082365364, "grad_norm": 2.039013624191284, "learning_rate": 1.3261290332108988e-08, "loss": 0.6183, "step": 13935 }, { "epoch": 0.9810630059838085, "grad_norm": 2.775846242904663, "learning_rate": 1.3163271204110316e-08, "loss": 0.6341, "step": 13936 }, { "epoch": 0.9811334037310806, "grad_norm": 2.336066246032715, "learning_rate": 1.3065615328082369e-08, "loss": 0.634, "step": 13937 }, { "epoch": 0.9812038014783527, "grad_norm": 1.8354887962341309, "learning_rate": 1.2968322709099977e-08, "loss": 0.6416, "step": 13938 }, { "epoch": 0.9812741992256248, "grad_norm": 1.8197954893112183, "learning_rate": 1.2871393352222426e-08, "loss": 0.6178, "step": 13939 }, { "epoch": 0.9813445969728969, "grad_norm": 1.792296051979065, "learning_rate": 1.2774827262488797e-08, "loss": 0.7439, "step": 13940 }, { "epoch": 0.9814149947201689, "grad_norm": 2.19572377204895, "learning_rate": 1.2678624444920295e-08, "loss": 0.5924, "step": 13941 }, { "epoch": 0.981485392467441, "grad_norm": 2.081674814224243, "learning_rate": 1.2582784904517918e-08, "loss": 0.6659, "step": 13942 }, { "epoch": 0.9815557902147132, "grad_norm": 1.905203938484192, "learning_rate": 1.2487308646264794e-08, "loss": 0.6418, "step": 13943 }, { "epoch": 0.9816261879619852, "grad_norm": 2.6321566104888916, "learning_rate": 1.2392195675124618e-08, "loss": 0.6995, "step": 13944 }, { "epoch": 0.9816965857092573, "grad_norm": 2.2972865104675293, "learning_rate": 1.2297445996042433e-08, "loss": 0.5546, "step": 13945 }, { "epoch": 0.9817669834565294, "grad_norm": 1.9354954957962036, "learning_rate": 1.2203059613944633e-08, "loss": 0.6373, "step": 13946 }, { "epoch": 0.9818373812038015, "grad_norm": 2.3683784008026123, "learning_rate": 1.210903653373896e-08, "loss": 0.6019, "step": 13947 }, { "epoch": 0.9819077789510736, "grad_norm": 1.8320515155792236, "learning_rate": 1.2015376760312168e-08, "loss": 0.6912, "step": 13948 }, { "epoch": 0.9819781766983456, "grad_norm": 1.9634754657745361, "learning_rate": 1.1922080298534699e-08, "loss": 0.6567, "step": 13949 }, { "epoch": 0.9820485744456178, "grad_norm": 1.9985042810440063, "learning_rate": 1.1829147153257557e-08, "loss": 0.754, "step": 13950 }, { "epoch": 0.9821189721928898, "grad_norm": 1.9498015642166138, "learning_rate": 1.1736577329311547e-08, "loss": 0.5641, "step": 13951 }, { "epoch": 0.9821893699401619, "grad_norm": 1.918049693107605, "learning_rate": 1.1644370831510376e-08, "loss": 0.6028, "step": 13952 }, { "epoch": 0.982259767687434, "grad_norm": 1.8839645385742188, "learning_rate": 1.1552527664646761e-08, "loss": 0.516, "step": 13953 }, { "epoch": 0.9823301654347061, "grad_norm": 2.1459548473358154, "learning_rate": 1.146104783349633e-08, "loss": 0.5675, "step": 13954 }, { "epoch": 0.9824005631819782, "grad_norm": 2.742687702178955, "learning_rate": 1.1369931342815275e-08, "loss": 0.6558, "step": 13955 }, { "epoch": 0.9824709609292502, "grad_norm": 2.6880836486816406, "learning_rate": 1.1279178197341144e-08, "loss": 0.7264, "step": 13956 }, { "epoch": 0.9825413586765224, "grad_norm": 2.388540029525757, "learning_rate": 1.118878840179205e-08, "loss": 0.7139, "step": 13957 }, { "epoch": 0.9826117564237944, "grad_norm": 2.3610472679138184, "learning_rate": 1.1098761960866677e-08, "loss": 0.6989, "step": 13958 }, { "epoch": 0.9826821541710665, "grad_norm": 1.921624779701233, "learning_rate": 1.1009098879246615e-08, "loss": 0.7972, "step": 13959 }, { "epoch": 0.9827525519183387, "grad_norm": 1.9396510124206543, "learning_rate": 1.0919799161592469e-08, "loss": 0.7281, "step": 13960 }, { "epoch": 0.9828229496656107, "grad_norm": 2.013742685317993, "learning_rate": 1.0830862812548526e-08, "loss": 0.66, "step": 13961 }, { "epoch": 0.9828933474128828, "grad_norm": 1.8520091772079468, "learning_rate": 1.0742289836737307e-08, "loss": 0.5934, "step": 13962 }, { "epoch": 0.9829637451601548, "grad_norm": 2.0910303592681885, "learning_rate": 1.0654080238764241e-08, "loss": 0.7546, "step": 13963 }, { "epoch": 0.983034142907427, "grad_norm": 1.8276128768920898, "learning_rate": 1.0566234023216103e-08, "loss": 0.68, "step": 13964 }, { "epoch": 0.9831045406546991, "grad_norm": 2.4960989952087402, "learning_rate": 1.0478751194658686e-08, "loss": 0.6393, "step": 13965 }, { "epoch": 0.9831749384019711, "grad_norm": 2.4091808795928955, "learning_rate": 1.0391631757641462e-08, "loss": 0.585, "step": 13966 }, { "epoch": 0.9832453361492433, "grad_norm": 1.992873191833496, "learning_rate": 1.0304875716692919e-08, "loss": 0.6613, "step": 13967 }, { "epoch": 0.9833157338965153, "grad_norm": 1.9012705087661743, "learning_rate": 1.0218483076324447e-08, "loss": 0.6813, "step": 13968 }, { "epoch": 0.9833861316437874, "grad_norm": 1.7931522130966187, "learning_rate": 1.0132453841028011e-08, "loss": 0.662, "step": 13969 }, { "epoch": 0.9834565293910594, "grad_norm": 1.7924164533615112, "learning_rate": 1.0046788015274587e-08, "loss": 0.6523, "step": 13970 }, { "epoch": 0.9835269271383316, "grad_norm": 1.8519757986068726, "learning_rate": 9.961485603519615e-09, "loss": 0.5935, "step": 13971 }, { "epoch": 0.9835973248856037, "grad_norm": 2.0876309871673584, "learning_rate": 9.876546610198322e-09, "loss": 0.6158, "step": 13972 }, { "epoch": 0.9836677226328757, "grad_norm": 2.145796298980713, "learning_rate": 9.791971039724955e-09, "loss": 0.5718, "step": 13973 }, { "epoch": 0.9837381203801479, "grad_norm": 2.106675148010254, "learning_rate": 9.70775889649822e-09, "loss": 0.7333, "step": 13974 }, { "epoch": 0.9838085181274199, "grad_norm": 3.06893253326416, "learning_rate": 9.623910184895834e-09, "loss": 0.7402, "step": 13975 }, { "epoch": 0.983878915874692, "grad_norm": 1.6798187494277954, "learning_rate": 9.540424909276868e-09, "loss": 0.7888, "step": 13976 }, { "epoch": 0.9839493136219641, "grad_norm": 2.155367374420166, "learning_rate": 9.457303073982515e-09, "loss": 0.6205, "step": 13977 }, { "epoch": 0.9840197113692362, "grad_norm": 1.775622844696045, "learning_rate": 9.374544683333763e-09, "loss": 0.7154, "step": 13978 }, { "epoch": 0.9840901091165083, "grad_norm": 1.9982688426971436, "learning_rate": 9.292149741633727e-09, "loss": 0.6728, "step": 13979 }, { "epoch": 0.9841605068637803, "grad_norm": 1.8827953338623047, "learning_rate": 9.210118253166088e-09, "loss": 0.5596, "step": 13980 }, { "epoch": 0.9842309046110524, "grad_norm": 1.9417206048965454, "learning_rate": 9.128450222195106e-09, "loss": 0.6872, "step": 13981 }, { "epoch": 0.9843013023583246, "grad_norm": 1.97786545753479, "learning_rate": 9.047145652967936e-09, "loss": 0.6288, "step": 13982 }, { "epoch": 0.9843717001055966, "grad_norm": 1.8112918138504028, "learning_rate": 8.966204549710754e-09, "loss": 0.6174, "step": 13983 }, { "epoch": 0.9844420978528687, "grad_norm": 1.851441502571106, "learning_rate": 8.885626916631862e-09, "loss": 0.6695, "step": 13984 }, { "epoch": 0.9845124956001408, "grad_norm": 2.0595505237579346, "learning_rate": 8.805412757920905e-09, "loss": 0.5973, "step": 13985 }, { "epoch": 0.9845828933474129, "grad_norm": 1.6627434492111206, "learning_rate": 8.725562077748106e-09, "loss": 0.5953, "step": 13986 }, { "epoch": 0.984653291094685, "grad_norm": 1.946688175201416, "learning_rate": 8.646074880265031e-09, "loss": 0.6711, "step": 13987 }, { "epoch": 0.984723688841957, "grad_norm": 1.7648195028305054, "learning_rate": 8.566951169604597e-09, "loss": 0.6777, "step": 13988 }, { "epoch": 0.9847940865892292, "grad_norm": 1.8309361934661865, "learning_rate": 8.488190949879515e-09, "loss": 0.6544, "step": 13989 }, { "epoch": 0.9848644843365012, "grad_norm": 2.323896884918213, "learning_rate": 8.409794225186173e-09, "loss": 0.7401, "step": 13990 }, { "epoch": 0.9849348820837733, "grad_norm": 1.9718310832977295, "learning_rate": 8.331760999598425e-09, "loss": 0.6844, "step": 13991 }, { "epoch": 0.9850052798310454, "grad_norm": 2.058990955352783, "learning_rate": 8.254091277175357e-09, "loss": 0.5444, "step": 13992 }, { "epoch": 0.9850756775783175, "grad_norm": 1.3400294780731201, "learning_rate": 8.176785061953517e-09, "loss": 0.7251, "step": 13993 }, { "epoch": 0.9851460753255896, "grad_norm": 2.042553663253784, "learning_rate": 8.099842357953912e-09, "loss": 0.5357, "step": 13994 }, { "epoch": 0.9852164730728616, "grad_norm": 2.0574820041656494, "learning_rate": 8.023263169174232e-09, "loss": 0.5628, "step": 13995 }, { "epoch": 0.9852868708201338, "grad_norm": 1.9616235494613647, "learning_rate": 7.947047499598958e-09, "loss": 0.6786, "step": 13996 }, { "epoch": 0.9853572685674058, "grad_norm": 1.701490879058838, "learning_rate": 7.8711953531877e-09, "loss": 0.5749, "step": 13997 }, { "epoch": 0.9854276663146779, "grad_norm": 2.7846617698669434, "learning_rate": 7.795706733886076e-09, "loss": 0.6449, "step": 13998 }, { "epoch": 0.9854980640619501, "grad_norm": 2.211088180541992, "learning_rate": 7.720581645618729e-09, "loss": 0.5475, "step": 13999 }, { "epoch": 0.9855684618092221, "grad_norm": 2.5922281742095947, "learning_rate": 7.64582009229009e-09, "loss": 0.7724, "step": 14000 }, { "epoch": 0.9856388595564942, "grad_norm": 2.1484062671661377, "learning_rate": 7.57142207778827e-09, "loss": 0.6083, "step": 14001 }, { "epoch": 0.9857092573037662, "grad_norm": 1.7994557619094849, "learning_rate": 7.497387605980399e-09, "loss": 0.5999, "step": 14002 }, { "epoch": 0.9857796550510384, "grad_norm": 1.8114882707595825, "learning_rate": 7.423716680716507e-09, "loss": 0.6932, "step": 14003 }, { "epoch": 0.9858500527983105, "grad_norm": 1.9373490810394287, "learning_rate": 7.3504093058264214e-09, "loss": 0.6656, "step": 14004 }, { "epoch": 0.9859204505455825, "grad_norm": 1.9877787828445435, "learning_rate": 7.277465485122092e-09, "loss": 0.5325, "step": 14005 }, { "epoch": 0.9859908482928547, "grad_norm": 2.1145706176757812, "learning_rate": 7.204885222394486e-09, "loss": 0.6834, "step": 14006 }, { "epoch": 0.9860612460401267, "grad_norm": 2.020113945007324, "learning_rate": 7.132668521418251e-09, "loss": 0.703, "step": 14007 }, { "epoch": 0.9861316437873988, "grad_norm": 2.0172245502471924, "learning_rate": 7.06081538594705e-09, "loss": 0.6896, "step": 14008 }, { "epoch": 0.9862020415346708, "grad_norm": 2.1435389518737793, "learning_rate": 6.989325819718228e-09, "loss": 0.6335, "step": 14009 }, { "epoch": 0.986272439281943, "grad_norm": 4.499448776245117, "learning_rate": 6.918199826447368e-09, "loss": 0.6926, "step": 14010 }, { "epoch": 0.9863428370292151, "grad_norm": 2.1330573558807373, "learning_rate": 6.8474374098321795e-09, "loss": 0.5628, "step": 14011 }, { "epoch": 0.9864132347764871, "grad_norm": 1.944887638092041, "learning_rate": 6.777038573552496e-09, "loss": 0.708, "step": 14012 }, { "epoch": 0.9864836325237593, "grad_norm": 2.613743782043457, "learning_rate": 6.7070033212679455e-09, "loss": 0.6284, "step": 14013 }, { "epoch": 0.9865540302710313, "grad_norm": 1.9081488847732544, "learning_rate": 6.637331656619505e-09, "loss": 0.7146, "step": 14014 }, { "epoch": 0.9866244280183034, "grad_norm": 1.8610341548919678, "learning_rate": 6.568023583230276e-09, "loss": 0.5814, "step": 14015 }, { "epoch": 0.9866948257655755, "grad_norm": 2.2371318340301514, "learning_rate": 6.499079104702377e-09, "loss": 0.6816, "step": 14016 }, { "epoch": 0.9867652235128476, "grad_norm": 1.9451833963394165, "learning_rate": 6.430498224621605e-09, "loss": 0.6729, "step": 14017 }, { "epoch": 0.9868356212601197, "grad_norm": 1.9928584098815918, "learning_rate": 6.362280946552778e-09, "loss": 0.6279, "step": 14018 }, { "epoch": 0.9869060190073917, "grad_norm": 1.9561370611190796, "learning_rate": 6.2944272740428355e-09, "loss": 0.5764, "step": 14019 }, { "epoch": 0.9869764167546639, "grad_norm": 1.652284860610962, "learning_rate": 6.226937210620065e-09, "loss": 0.7394, "step": 14020 }, { "epoch": 0.987046814501936, "grad_norm": 2.2796897888183594, "learning_rate": 6.159810759791773e-09, "loss": 0.5876, "step": 14021 }, { "epoch": 0.987117212249208, "grad_norm": 1.7181034088134766, "learning_rate": 6.093047925049721e-09, "loss": 0.5474, "step": 14022 }, { "epoch": 0.9871876099964801, "grad_norm": 2.130495309829712, "learning_rate": 6.026648709863913e-09, "loss": 0.5941, "step": 14023 }, { "epoch": 0.9872580077437522, "grad_norm": 2.361323118209839, "learning_rate": 5.960613117686475e-09, "loss": 0.7427, "step": 14024 }, { "epoch": 0.9873284054910243, "grad_norm": 1.876387357711792, "learning_rate": 5.8949411519508834e-09, "loss": 0.6727, "step": 14025 }, { "epoch": 0.9873988032382964, "grad_norm": 2.15578556060791, "learning_rate": 5.829632816071961e-09, "loss": 0.6189, "step": 14026 }, { "epoch": 0.9874692009855685, "grad_norm": 2.12323260307312, "learning_rate": 5.7646881134443275e-09, "loss": 0.6963, "step": 14027 }, { "epoch": 0.9875395987328406, "grad_norm": 1.783199429512024, "learning_rate": 5.700107047445502e-09, "loss": 0.685, "step": 14028 }, { "epoch": 0.9876099964801126, "grad_norm": 2.0257086753845215, "learning_rate": 5.635889621432022e-09, "loss": 0.6085, "step": 14029 }, { "epoch": 0.9876803942273847, "grad_norm": 1.8655483722686768, "learning_rate": 5.5720358387433276e-09, "loss": 0.5587, "step": 14030 }, { "epoch": 0.9877507919746568, "grad_norm": 1.9546794891357422, "learning_rate": 5.508545702698653e-09, "loss": 0.6426, "step": 14031 }, { "epoch": 0.9878211897219289, "grad_norm": 2.264629602432251, "learning_rate": 5.4454192165993565e-09, "loss": 0.6619, "step": 14032 }, { "epoch": 0.987891587469201, "grad_norm": 1.9713423252105713, "learning_rate": 5.382656383727369e-09, "loss": 0.5958, "step": 14033 }, { "epoch": 0.987961985216473, "grad_norm": 2.0198304653167725, "learning_rate": 5.3202572073459684e-09, "loss": 0.7124, "step": 14034 }, { "epoch": 0.9880323829637452, "grad_norm": 2.1491012573242188, "learning_rate": 5.258221690699783e-09, "loss": 0.6389, "step": 14035 }, { "epoch": 0.9881027807110172, "grad_norm": 2.0994949340820312, "learning_rate": 5.196549837012454e-09, "loss": 0.6081, "step": 14036 }, { "epoch": 0.9881731784582893, "grad_norm": 1.6076310873031616, "learning_rate": 5.135241649492084e-09, "loss": 0.6784, "step": 14037 }, { "epoch": 0.9882435762055615, "grad_norm": 2.304334878921509, "learning_rate": 5.074297131325011e-09, "loss": 0.7208, "step": 14038 }, { "epoch": 0.9883139739528335, "grad_norm": 1.677261233329773, "learning_rate": 5.013716285679704e-09, "loss": 0.5862, "step": 14039 }, { "epoch": 0.9883843717001056, "grad_norm": 1.9720070362091064, "learning_rate": 4.9534991157067495e-09, "loss": 0.6894, "step": 14040 }, { "epoch": 0.9884547694473776, "grad_norm": 1.5806623697280884, "learning_rate": 4.893645624537313e-09, "loss": 0.5267, "step": 14041 }, { "epoch": 0.9885251671946498, "grad_norm": 2.0200562477111816, "learning_rate": 4.834155815281571e-09, "loss": 0.6403, "step": 14042 }, { "epoch": 0.9885955649419219, "grad_norm": 2.1230628490448, "learning_rate": 4.775029691033383e-09, "loss": 0.6975, "step": 14043 }, { "epoch": 0.9886659626891939, "grad_norm": 1.834315299987793, "learning_rate": 4.716267254866402e-09, "loss": 0.5647, "step": 14044 }, { "epoch": 0.9887363604364661, "grad_norm": 1.9170018434524536, "learning_rate": 4.657868509836405e-09, "loss": 0.5133, "step": 14045 }, { "epoch": 0.9888067581837381, "grad_norm": 1.9031949043273926, "learning_rate": 4.599833458978187e-09, "loss": 0.5852, "step": 14046 }, { "epoch": 0.9888771559310102, "grad_norm": 4.969810962677002, "learning_rate": 4.542162105310998e-09, "loss": 0.7106, "step": 14047 }, { "epoch": 0.9889475536782822, "grad_norm": 1.840739130973816, "learning_rate": 4.484854451832332e-09, "loss": 0.6549, "step": 14048 }, { "epoch": 0.9890179514255544, "grad_norm": 1.9708251953125, "learning_rate": 4.4279105015218035e-09, "loss": 0.648, "step": 14049 }, { "epoch": 0.9890883491728265, "grad_norm": 2.1382148265838623, "learning_rate": 4.3713302573388234e-09, "loss": 0.6916, "step": 14050 }, { "epoch": 0.9891587469200985, "grad_norm": 1.7827239036560059, "learning_rate": 4.315113722226482e-09, "loss": 0.6595, "step": 14051 }, { "epoch": 0.9892291446673707, "grad_norm": 2.6011300086975098, "learning_rate": 4.259260899106887e-09, "loss": 0.706, "step": 14052 }, { "epoch": 0.9892995424146427, "grad_norm": 1.8428279161453247, "learning_rate": 4.2037717908842695e-09, "loss": 0.5553, "step": 14053 }, { "epoch": 0.9893699401619148, "grad_norm": 1.9215117692947388, "learning_rate": 4.148646400443434e-09, "loss": 0.6553, "step": 14054 }, { "epoch": 0.989440337909187, "grad_norm": 2.105013847351074, "learning_rate": 4.093884730649755e-09, "loss": 0.6031, "step": 14055 }, { "epoch": 0.989510735656459, "grad_norm": 1.760157823562622, "learning_rate": 4.039486784351509e-09, "loss": 0.6701, "step": 14056 }, { "epoch": 0.9895811334037311, "grad_norm": 1.9528449773788452, "learning_rate": 3.9854525643759905e-09, "loss": 0.6564, "step": 14057 }, { "epoch": 0.9896515311510031, "grad_norm": 2.08646821975708, "learning_rate": 3.931782073532619e-09, "loss": 0.649, "step": 14058 }, { "epoch": 0.9897219288982753, "grad_norm": 2.053027391433716, "learning_rate": 3.878475314612162e-09, "loss": 0.4688, "step": 14059 }, { "epoch": 0.9897923266455474, "grad_norm": 1.674824833869934, "learning_rate": 3.825532290385958e-09, "loss": 0.6578, "step": 14060 }, { "epoch": 0.9898627243928194, "grad_norm": 2.2559123039245605, "learning_rate": 3.772953003605917e-09, "loss": 0.6346, "step": 14061 }, { "epoch": 0.9899331221400915, "grad_norm": 1.688109040260315, "learning_rate": 3.7207374570068506e-09, "loss": 0.5361, "step": 14062 }, { "epoch": 0.9900035198873636, "grad_norm": 2.0562539100646973, "learning_rate": 3.6688856533025893e-09, "loss": 0.7315, "step": 14063 }, { "epoch": 0.9900739176346357, "grad_norm": 1.9297562837600708, "learning_rate": 3.617397595189087e-09, "loss": 0.7031, "step": 14064 }, { "epoch": 0.9901443153819077, "grad_norm": 2.104715347290039, "learning_rate": 3.5662732853428692e-09, "loss": 0.6194, "step": 14065 }, { "epoch": 0.9902147131291799, "grad_norm": 2.136404275894165, "learning_rate": 3.5155127264233645e-09, "loss": 0.6665, "step": 14066 }, { "epoch": 0.990285110876452, "grad_norm": 1.848243236541748, "learning_rate": 3.4651159210682402e-09, "loss": 0.5983, "step": 14067 }, { "epoch": 0.990355508623724, "grad_norm": 2.237325429916382, "learning_rate": 3.4150828718980676e-09, "loss": 0.6312, "step": 14068 }, { "epoch": 0.9904259063709961, "grad_norm": 1.7631250619888306, "learning_rate": 3.3654135815147647e-09, "loss": 0.7385, "step": 14069 }, { "epoch": 0.9904963041182682, "grad_norm": 2.184685468673706, "learning_rate": 3.316108052500044e-09, "loss": 0.6602, "step": 14070 }, { "epoch": 0.9905667018655403, "grad_norm": 1.9652299880981445, "learning_rate": 3.267166287416967e-09, "loss": 0.6241, "step": 14071 }, { "epoch": 0.9906370996128124, "grad_norm": 2.068751811981201, "learning_rate": 3.2185882888107196e-09, "loss": 0.6031, "step": 14072 }, { "epoch": 0.9907074973600845, "grad_norm": 1.6743046045303345, "learning_rate": 3.1703740592070593e-09, "loss": 0.5726, "step": 14073 }, { "epoch": 0.9907778951073566, "grad_norm": 2.113968849182129, "learning_rate": 3.1225236011115376e-09, "loss": 0.6491, "step": 14074 }, { "epoch": 0.9908482928546286, "grad_norm": 1.942960262298584, "learning_rate": 3.075036917013385e-09, "loss": 0.6085, "step": 14075 }, { "epoch": 0.9909186906019007, "grad_norm": 2.0166244506835938, "learning_rate": 3.02791400938085e-09, "loss": 0.6402, "step": 14076 }, { "epoch": 0.9909890883491729, "grad_norm": 2.287775754928589, "learning_rate": 2.981154880664305e-09, "loss": 0.6096, "step": 14077 }, { "epoch": 0.9910594860964449, "grad_norm": 2.3151180744171143, "learning_rate": 2.934759533293918e-09, "loss": 0.6723, "step": 14078 }, { "epoch": 0.991129883843717, "grad_norm": 3.1817901134490967, "learning_rate": 2.888727969681981e-09, "loss": 0.5293, "step": 14079 }, { "epoch": 0.991200281590989, "grad_norm": 1.7909828424453735, "learning_rate": 2.8430601922221352e-09, "loss": 0.5784, "step": 14080 }, { "epoch": 0.9912706793382612, "grad_norm": 2.031649112701416, "learning_rate": 2.7977562032885927e-09, "loss": 0.5314, "step": 14081 }, { "epoch": 0.9913410770855333, "grad_norm": 2.5910756587982178, "learning_rate": 2.752816005236913e-09, "loss": 0.6443, "step": 14082 }, { "epoch": 0.9914114748328053, "grad_norm": 1.8849961757659912, "learning_rate": 2.7082396004040054e-09, "loss": 0.6841, "step": 14083 }, { "epoch": 0.9914818725800775, "grad_norm": 1.9312026500701904, "learning_rate": 2.6640269911057945e-09, "loss": 0.6617, "step": 14084 }, { "epoch": 0.9915522703273495, "grad_norm": 1.8406873941421509, "learning_rate": 2.6201781796426626e-09, "loss": 0.5606, "step": 14085 }, { "epoch": 0.9916226680746216, "grad_norm": 1.8848075866699219, "learning_rate": 2.5766931682932315e-09, "loss": 0.6702, "step": 14086 }, { "epoch": 0.9916930658218936, "grad_norm": 1.9048500061035156, "learning_rate": 2.5335719593190252e-09, "loss": 0.5284, "step": 14087 }, { "epoch": 0.9917634635691658, "grad_norm": 2.1925032138824463, "learning_rate": 2.4908145549621397e-09, "loss": 0.5853, "step": 14088 }, { "epoch": 0.9918338613164379, "grad_norm": 2.0724008083343506, "learning_rate": 2.448420957444464e-09, "loss": 0.6931, "step": 14089 }, { "epoch": 0.9919042590637099, "grad_norm": 1.7706056833267212, "learning_rate": 2.4063911689707897e-09, "loss": 0.5902, "step": 14090 }, { "epoch": 0.9919746568109821, "grad_norm": 1.7925007343292236, "learning_rate": 2.36472519172648e-09, "loss": 0.5885, "step": 14091 }, { "epoch": 0.9920450545582541, "grad_norm": 2.3087406158447266, "learning_rate": 2.3234230278774693e-09, "loss": 0.6964, "step": 14092 }, { "epoch": 0.9921154523055262, "grad_norm": 1.9109275341033936, "learning_rate": 2.2824846795710398e-09, "loss": 0.6631, "step": 14093 }, { "epoch": 0.9921858500527984, "grad_norm": 1.6266261339187622, "learning_rate": 2.2419101489358216e-09, "loss": 0.716, "step": 14094 }, { "epoch": 0.9922562478000704, "grad_norm": 2.133861780166626, "learning_rate": 2.2016994380810174e-09, "loss": 0.6442, "step": 14095 }, { "epoch": 0.9923266455473425, "grad_norm": 1.495941162109375, "learning_rate": 2.1618525490979533e-09, "loss": 0.7063, "step": 14096 }, { "epoch": 0.9923970432946145, "grad_norm": 1.9309149980545044, "learning_rate": 2.122369484057751e-09, "loss": 0.5599, "step": 14097 }, { "epoch": 0.9924674410418867, "grad_norm": 1.9421753883361816, "learning_rate": 2.0832502450128796e-09, "loss": 0.6043, "step": 14098 }, { "epoch": 0.9925378387891588, "grad_norm": 2.603945732116699, "learning_rate": 2.044494833997157e-09, "loss": 0.6254, "step": 14099 }, { "epoch": 0.9926082365364308, "grad_norm": 1.834574818611145, "learning_rate": 2.0061032530265254e-09, "loss": 0.5427, "step": 14100 }, { "epoch": 0.992678634283703, "grad_norm": 2.470163106918335, "learning_rate": 1.968075504095945e-09, "loss": 0.6672, "step": 14101 }, { "epoch": 0.992749032030975, "grad_norm": 2.352792978286743, "learning_rate": 1.9304115891832783e-09, "loss": 0.5693, "step": 14102 }, { "epoch": 0.9928194297782471, "grad_norm": 2.573340892791748, "learning_rate": 1.8931115102461816e-09, "loss": 0.5922, "step": 14103 }, { "epoch": 0.9928898275255191, "grad_norm": 1.9745357036590576, "learning_rate": 1.856175269224436e-09, "loss": 0.6376, "step": 14104 }, { "epoch": 0.9929602252727913, "grad_norm": 2.1511168479919434, "learning_rate": 1.8196028680376176e-09, "loss": 0.5702, "step": 14105 }, { "epoch": 0.9930306230200634, "grad_norm": 1.8787190914154053, "learning_rate": 1.7833943085874271e-09, "loss": 0.6719, "step": 14106 }, { "epoch": 0.9931010207673354, "grad_norm": 2.222062826156616, "learning_rate": 1.747549592756914e-09, "loss": 0.6474, "step": 14107 }, { "epoch": 0.9931714185146076, "grad_norm": 1.794286847114563, "learning_rate": 1.7120687224089215e-09, "loss": 0.5515, "step": 14108 }, { "epoch": 0.9932418162618796, "grad_norm": 1.915016770362854, "learning_rate": 1.676951699388418e-09, "loss": 0.636, "step": 14109 }, { "epoch": 0.9933122140091517, "grad_norm": 1.786875605583191, "learning_rate": 1.642198525521721e-09, "loss": 0.5618, "step": 14110 }, { "epoch": 0.9933826117564238, "grad_norm": 2.170663356781006, "learning_rate": 1.6078092026149404e-09, "loss": 0.6753, "step": 14111 }, { "epoch": 0.9934530095036959, "grad_norm": 1.7235007286071777, "learning_rate": 1.573783732456313e-09, "loss": 0.6654, "step": 14112 }, { "epoch": 0.993523407250968, "grad_norm": 1.6433396339416504, "learning_rate": 1.5401221168146461e-09, "loss": 0.6042, "step": 14113 }, { "epoch": 0.99359380499824, "grad_norm": 1.9440629482269287, "learning_rate": 1.5068243574408723e-09, "loss": 0.6908, "step": 14114 }, { "epoch": 0.9936642027455121, "grad_norm": 2.149019241333008, "learning_rate": 1.4738904560649413e-09, "loss": 0.7371, "step": 14115 }, { "epoch": 0.9937346004927843, "grad_norm": 2.410818338394165, "learning_rate": 1.441320414399705e-09, "loss": 0.6622, "step": 14116 }, { "epoch": 0.9938049982400563, "grad_norm": 1.860845685005188, "learning_rate": 1.4091142341385866e-09, "loss": 0.633, "step": 14117 }, { "epoch": 0.9938753959873284, "grad_norm": 2.246426582336426, "learning_rate": 1.3772719169555802e-09, "loss": 0.6925, "step": 14118 }, { "epoch": 0.9939457937346005, "grad_norm": 1.945698857307434, "learning_rate": 1.345793464506806e-09, "loss": 0.6595, "step": 14119 }, { "epoch": 0.9940161914818726, "grad_norm": 1.9650613069534302, "learning_rate": 1.3146788784289543e-09, "loss": 0.6548, "step": 14120 }, { "epoch": 0.9940865892291446, "grad_norm": 1.8250097036361694, "learning_rate": 1.28392816033851e-09, "loss": 0.6318, "step": 14121 }, { "epoch": 0.9941569869764167, "grad_norm": 1.8473412990570068, "learning_rate": 1.2535413118356375e-09, "loss": 0.565, "step": 14122 }, { "epoch": 0.9942273847236889, "grad_norm": 1.6056463718414307, "learning_rate": 1.2235183344995182e-09, "loss": 0.6967, "step": 14123 }, { "epoch": 0.9942977824709609, "grad_norm": 2.119598865509033, "learning_rate": 1.1938592298914585e-09, "loss": 0.6024, "step": 14124 }, { "epoch": 0.994368180218233, "grad_norm": 2.2559263706207275, "learning_rate": 1.1645639995525592e-09, "loss": 0.6745, "step": 14125 }, { "epoch": 0.994438577965505, "grad_norm": 2.1920175552368164, "learning_rate": 1.1356326450068232e-09, "loss": 0.7412, "step": 14126 }, { "epoch": 0.9945089757127772, "grad_norm": 2.134639263153076, "learning_rate": 1.107065167758048e-09, "loss": 0.6635, "step": 14127 }, { "epoch": 0.9945793734600493, "grad_norm": 1.9435850381851196, "learning_rate": 1.0788615692913783e-09, "loss": 0.6596, "step": 14128 }, { "epoch": 0.9946497712073213, "grad_norm": 1.8507931232452393, "learning_rate": 1.0510218510733083e-09, "loss": 0.5353, "step": 14129 }, { "epoch": 0.9947201689545935, "grad_norm": 2.150902032852173, "learning_rate": 1.0235460145516795e-09, "loss": 0.6313, "step": 14130 }, { "epoch": 0.9947905667018655, "grad_norm": 1.600439190864563, "learning_rate": 9.964340611541278e-10, "loss": 0.5963, "step": 14131 }, { "epoch": 0.9948609644491376, "grad_norm": 2.304680347442627, "learning_rate": 9.696859922904144e-10, "loss": 0.642, "step": 14132 }, { "epoch": 0.9949313621964098, "grad_norm": 2.050065279006958, "learning_rate": 9.433018093516488e-10, "loss": 0.681, "step": 14133 }, { "epoch": 0.9950017599436818, "grad_norm": 2.0588579177856445, "learning_rate": 9.172815137095114e-10, "loss": 0.6598, "step": 14134 }, { "epoch": 0.9950721576909539, "grad_norm": 2.595705986022949, "learning_rate": 8.916251067162539e-10, "loss": 0.6005, "step": 14135 }, { "epoch": 0.9951425554382259, "grad_norm": 2.1721200942993164, "learning_rate": 8.663325897070306e-10, "loss": 0.6764, "step": 14136 }, { "epoch": 0.9952129531854981, "grad_norm": 1.7241231203079224, "learning_rate": 8.414039639952353e-10, "loss": 0.6472, "step": 14137 }, { "epoch": 0.9952833509327702, "grad_norm": 2.1990201473236084, "learning_rate": 8.168392308779415e-10, "loss": 0.6975, "step": 14138 }, { "epoch": 0.9953537486800422, "grad_norm": 2.0451316833496094, "learning_rate": 7.926383916320167e-10, "loss": 0.5878, "step": 14139 }, { "epoch": 0.9954241464273144, "grad_norm": 2.0774171352386475, "learning_rate": 7.688014475148997e-10, "loss": 0.6085, "step": 14140 }, { "epoch": 0.9954945441745864, "grad_norm": 1.7620710134506226, "learning_rate": 7.453283997677085e-10, "loss": 0.5158, "step": 14141 }, { "epoch": 0.9955649419218585, "grad_norm": 1.9479080438613892, "learning_rate": 7.222192496090241e-10, "loss": 0.6348, "step": 14142 }, { "epoch": 0.9956353396691305, "grad_norm": 2.004749298095703, "learning_rate": 6.994739982418841e-10, "loss": 0.6239, "step": 14143 }, { "epoch": 0.9957057374164027, "grad_norm": 2.126868963241577, "learning_rate": 6.770926468475658e-10, "loss": 0.66, "step": 14144 }, { "epoch": 0.9957761351636748, "grad_norm": 1.7352176904678345, "learning_rate": 6.55075196590249e-10, "loss": 0.6451, "step": 14145 }, { "epoch": 0.9958465329109468, "grad_norm": 1.9501675367355347, "learning_rate": 6.334216486146848e-10, "loss": 0.5504, "step": 14146 }, { "epoch": 0.995916930658219, "grad_norm": 2.2541067600250244, "learning_rate": 6.121320040461953e-10, "loss": 0.7119, "step": 14147 }, { "epoch": 0.995987328405491, "grad_norm": 2.080595016479492, "learning_rate": 5.912062639922277e-10, "loss": 0.6392, "step": 14148 }, { "epoch": 0.9960577261527631, "grad_norm": 1.8793208599090576, "learning_rate": 5.706444295400237e-10, "loss": 0.6152, "step": 14149 }, { "epoch": 0.9961281239000352, "grad_norm": 1.77863609790802, "learning_rate": 5.504465017597271e-10, "loss": 0.553, "step": 14150 }, { "epoch": 0.9961985216473073, "grad_norm": 1.911460280418396, "learning_rate": 5.306124817004986e-10, "loss": 0.726, "step": 14151 }, { "epoch": 0.9962689193945794, "grad_norm": 1.8550230264663696, "learning_rate": 5.111423703936246e-10, "loss": 0.6247, "step": 14152 }, { "epoch": 0.9963393171418514, "grad_norm": 1.716148853302002, "learning_rate": 4.920361688517394e-10, "loss": 0.6444, "step": 14153 }, { "epoch": 0.9964097148891236, "grad_norm": 1.9871270656585693, "learning_rate": 4.732938780680485e-10, "loss": 0.6804, "step": 14154 }, { "epoch": 0.9964801126363957, "grad_norm": 2.0309035778045654, "learning_rate": 4.549154990171056e-10, "loss": 0.7381, "step": 14155 }, { "epoch": 0.9965505103836677, "grad_norm": 1.902988076210022, "learning_rate": 4.3690103265403567e-10, "loss": 0.6604, "step": 14156 }, { "epoch": 0.9966209081309398, "grad_norm": 2.0539450645446777, "learning_rate": 4.19250479916089e-10, "loss": 0.6483, "step": 14157 }, { "epoch": 0.9966913058782119, "grad_norm": 1.8673015832901, "learning_rate": 4.019638417203097e-10, "loss": 0.6771, "step": 14158 }, { "epoch": 0.996761703625484, "grad_norm": 2.5850186347961426, "learning_rate": 3.850411189650904e-10, "loss": 0.6324, "step": 14159 }, { "epoch": 0.996832101372756, "grad_norm": 1.9290416240692139, "learning_rate": 3.68482312531726e-10, "loss": 0.5, "step": 14160 }, { "epoch": 0.9969024991200282, "grad_norm": 2.1429805755615234, "learning_rate": 3.5228742327975124e-10, "loss": 0.7525, "step": 14161 }, { "epoch": 0.9969728968673003, "grad_norm": 2.008741617202759, "learning_rate": 3.3645645205160334e-10, "loss": 0.6475, "step": 14162 }, { "epoch": 0.9970432946145723, "grad_norm": 1.93205726146698, "learning_rate": 3.2098939967029063e-10, "loss": 0.7307, "step": 14163 }, { "epoch": 0.9971136923618444, "grad_norm": 2.1158223152160645, "learning_rate": 3.058862669393925e-10, "loss": 0.6162, "step": 14164 }, { "epoch": 0.9971840901091165, "grad_norm": 1.6570450067520142, "learning_rate": 2.911470546461681e-10, "loss": 0.6142, "step": 14165 }, { "epoch": 0.9972544878563886, "grad_norm": 1.985223650932312, "learning_rate": 2.7677176355456187e-10, "loss": 0.6387, "step": 14166 }, { "epoch": 0.9973248856036607, "grad_norm": 2.378422975540161, "learning_rate": 2.627603944129753e-10, "loss": 0.5987, "step": 14167 }, { "epoch": 0.9973952833509327, "grad_norm": 1.5257035493850708, "learning_rate": 2.4911294794960345e-10, "loss": 0.7169, "step": 14168 }, { "epoch": 0.9974656810982049, "grad_norm": 1.816434383392334, "learning_rate": 2.358294248747672e-10, "loss": 0.5912, "step": 14169 }, { "epoch": 0.9975360788454769, "grad_norm": 1.9785574674606323, "learning_rate": 2.229098258785811e-10, "loss": 0.6445, "step": 14170 }, { "epoch": 0.997606476592749, "grad_norm": 1.8715665340423584, "learning_rate": 2.1035415163250803e-10, "loss": 0.6614, "step": 14171 }, { "epoch": 0.9976768743400212, "grad_norm": 1.8300496339797974, "learning_rate": 1.9816240278935914e-10, "loss": 0.6502, "step": 14172 }, { "epoch": 0.9977472720872932, "grad_norm": 1.8629519939422607, "learning_rate": 1.8633457998329385e-10, "loss": 0.7724, "step": 14173 }, { "epoch": 0.9978176698345653, "grad_norm": 1.5848112106323242, "learning_rate": 1.7487068382904257e-10, "loss": 0.6106, "step": 14174 }, { "epoch": 0.9978880675818373, "grad_norm": 2.324415683746338, "learning_rate": 1.6377071492268413e-10, "loss": 0.6038, "step": 14175 }, { "epoch": 0.9979584653291095, "grad_norm": 2.0654568672180176, "learning_rate": 1.5303467384086832e-10, "loss": 0.6651, "step": 14176 }, { "epoch": 0.9980288630763815, "grad_norm": 1.7763820886611938, "learning_rate": 1.4266256114237042e-10, "loss": 0.6066, "step": 14177 }, { "epoch": 0.9980992608236536, "grad_norm": 5.992873191833496, "learning_rate": 1.3265437736653672e-10, "loss": 0.6407, "step": 14178 }, { "epoch": 0.9981696585709258, "grad_norm": 2.625185251235962, "learning_rate": 1.230101230340619e-10, "loss": 0.6763, "step": 14179 }, { "epoch": 0.9982400563181978, "grad_norm": 2.2489564418792725, "learning_rate": 1.1372979864465727e-10, "loss": 0.7371, "step": 14180 }, { "epoch": 0.9983104540654699, "grad_norm": 2.240708589553833, "learning_rate": 1.0481340468249112e-10, "loss": 0.6033, "step": 14181 }, { "epoch": 0.9983808518127419, "grad_norm": 2.5129106044769287, "learning_rate": 9.62609416107485e-11, "loss": 0.5712, "step": 14182 }, { "epoch": 0.9984512495600141, "grad_norm": 1.9680602550506592, "learning_rate": 8.807240987318554e-11, "loss": 0.551, "step": 14183 }, { "epoch": 0.9985216473072862, "grad_norm": 2.267918586730957, "learning_rate": 8.024780989723812e-11, "loss": 0.6886, "step": 14184 }, { "epoch": 0.9985920450545582, "grad_norm": 2.017542600631714, "learning_rate": 7.278714208780457e-11, "loss": 0.703, "step": 14185 }, { "epoch": 0.9986624428018304, "grad_norm": 2.2735044956207275, "learning_rate": 6.56904068342401e-11, "loss": 0.6792, "step": 14186 }, { "epoch": 0.9987328405491024, "grad_norm": 1.860819697380066, "learning_rate": 5.895760450413955e-11, "loss": 0.5716, "step": 14187 }, { "epoch": 0.9988032382963745, "grad_norm": 1.9798089265823364, "learning_rate": 5.258873544877751e-11, "loss": 0.5898, "step": 14188 }, { "epoch": 0.9988736360436467, "grad_norm": 1.7142122983932495, "learning_rate": 4.658379999922246e-11, "loss": 0.5734, "step": 14189 }, { "epoch": 0.9989440337909187, "grad_norm": 2.1595804691314697, "learning_rate": 4.094279846711402e-11, "loss": 0.7522, "step": 14190 }, { "epoch": 0.9990144315381908, "grad_norm": 1.7425943613052368, "learning_rate": 3.5665731146217184e-11, "loss": 0.5077, "step": 14191 }, { "epoch": 0.9990848292854628, "grad_norm": 2.145644187927246, "learning_rate": 3.075259831009092e-11, "loss": 0.6717, "step": 14192 }, { "epoch": 0.999155227032735, "grad_norm": 2.2604100704193115, "learning_rate": 2.620340021441958e-11, "loss": 0.5737, "step": 14193 }, { "epoch": 0.9992256247800071, "grad_norm": 1.644917368888855, "learning_rate": 2.2018137097012947e-11, "loss": 0.6543, "step": 14194 }, { "epoch": 0.9992960225272791, "grad_norm": 2.2545053958892822, "learning_rate": 1.8196809173920413e-11, "loss": 0.5299, "step": 14195 }, { "epoch": 0.9993664202745512, "grad_norm": 2.11588716506958, "learning_rate": 1.473941664409395e-11, "loss": 0.6384, "step": 14196 }, { "epoch": 0.9994368180218233, "grad_norm": 1.6545628309249878, "learning_rate": 1.1645959687833773e-11, "loss": 0.5867, "step": 14197 }, { "epoch": 0.9995072157690954, "grad_norm": 1.9196447134017944, "learning_rate": 8.916438465234044e-12, "loss": 0.6798, "step": 14198 }, { "epoch": 0.9995776135163674, "grad_norm": 2.4446861743927, "learning_rate": 6.550853119291488e-12, "loss": 0.6521, "step": 14199 }, { "epoch": 0.9996480112636396, "grad_norm": 2.23484206199646, "learning_rate": 4.549203772019616e-12, "loss": 0.6638, "step": 14200 }, { "epoch": 0.9997184090109117, "grad_norm": 1.748366117477417, "learning_rate": 2.9114905275573477e-12, "loss": 0.6011, "step": 14201 }, { "epoch": 0.9997888067581837, "grad_norm": 1.6977177858352661, "learning_rate": 1.637713471391855e-12, "loss": 0.463, "step": 14202 }, { "epoch": 0.9998592045054558, "grad_norm": 1.8524410724639893, "learning_rate": 7.278726695814086e-13, "loss": 0.6481, "step": 14203 }, { "epoch": 0.9999296022527279, "grad_norm": 1.696658730506897, "learning_rate": 1.8196816953253147e-13, "loss": 0.5645, "step": 14204 }, { "epoch": 1.0, "grad_norm": 1.9762729406356812, "learning_rate": 0.0, "loss": 0.6818, "step": 14205 }, { "epoch": 1.0, "step": 14205, "total_flos": 6.540228112607609e+18, "train_loss": 0.3734022373319805, "train_runtime": 132908.7237, "train_samples_per_second": 10.26, "train_steps_per_second": 0.107 } ], "logging_steps": 1.0, "max_steps": 14205, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.540228112607609e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }